perf: pre-compile regex patterns and extract markdown stripping to utils/text.rs

This commit is contained in:
xfy 2026-06-04 16:08:01 +08:00
parent 28707d3c3a
commit c27b2d513e
2 changed files with 67 additions and 77 deletions

View File

@ -7,6 +7,7 @@ use crate::api::utils::{db_conn_error, query_error};
use crate::db::pool::get_conn;
use crate::models::post::{Post, PostStats, PostStatus, Tag};
use crate::models::user::{User, UserRole};
use crate::utils::text::{count_words, auto_summary};
// ============================================================================
// Server-side helpers (only compiled when server feature is enabled)
@ -374,82 +375,6 @@ fn slugify_heading(text: &str) -> String {
slug
}
#[cfg(feature = "server")]
fn count_words(md: &str) -> u32 {
// Remove markdown syntax
let mut plain = md.to_string();
plain = regex::Regex::new(r"```[\s\S]*?```").unwrap().replace_all(&plain, "").to_string();
plain = regex::Regex::new(r"`[^`]*`").unwrap().replace_all(&plain, "").to_string();
plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "$1").to_string();
plain = regex::Regex::new(r"^#{1,6}\s*").unwrap().replace_all(&plain, "").to_string();
plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "").to_string();
plain = plain.replace("**", "").replace("*", "").replace("__", "").replace("_", "");
// Count Chinese characters and English words
let mut count = 0u32;
let mut in_word = false;
for c in plain.chars() {
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
count += 1;
in_word = false;
} else if c.is_alphabetic() {
if !in_word {
count += 1;
in_word = true;
}
} else {
in_word = false;
}
}
count.max(1)
}
#[cfg(feature = "server")]
fn auto_summary(md: &str) -> String {
// Strip markdown syntax roughly: remove heading markers, bold, italic, links, code fences
let mut plain = md.to_string();
// Remove code blocks
plain = regex::Regex::new(r"```[\s\S]*?```")
.unwrap()
.replace_all(&plain, "")
.to_string();
// Remove inline code
plain = regex::Regex::new(r"`[^`]*`")
.unwrap()
.replace_all(&plain, "")
.to_string();
// Remove links: [text](url) -> text
plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)")
.unwrap()
.replace_all(&plain, "$1")
.to_string();
// Remove heading markers
plain = regex::Regex::new(r"^#{1,6}\s*")
.unwrap()
.replace_all(&plain, "")
.to_string();
// Remove bold/italic markers
plain = plain
.replace("**", "")
.replace("*", "")
.replace("__", "")
.replace("_", "");
// Remove images
plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)")
.unwrap()
.replace_all(&plain, "")
.to_string();
// Collapse whitespace
plain = regex::Regex::new(r"\s+")
.unwrap()
.replace_all(&plain, " ")
.to_string();
plain.trim().chars().take(200).collect()
}
// ============================================================================
// Tag helpers
// ============================================================================

View File

@ -1 +1,66 @@
// Placeholder for text utilities (will be created in a later task)
use std::sync::LazyLock;
static CODE_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"```[\s\S]*?```").unwrap()
});
static INLINE_CODE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"`[^`]*`").unwrap()
});
static LINK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap()
});
static HEADING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"^#{1,6}\s*").unwrap()
});
static IMAGE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap()
});
static WHITESPACE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"\s+").unwrap()
});
pub fn strip_markdown(md: &str) -> String {
let mut plain = CODE_BLOCK_RE.replace_all(md, "").to_string();
plain = INLINE_CODE_RE.replace_all(&plain, "").to_string();
plain = LINK_RE.replace_all(&plain, "$1").to_string();
plain = HEADING_RE.replace_all(&plain, "").to_string();
plain = IMAGE_RE.replace_all(&plain, "").to_string();
plain = plain
.replace("**", "")
.replace('*', "")
.replace("__", "")
.replace('_', "");
plain = WHITESPACE_RE.replace_all(&plain, " ").to_string();
plain.trim().to_string()
}
pub fn count_words(md: &str) -> u32 {
let plain = strip_markdown(md);
let mut count = 0u32;
let mut in_word = false;
for c in plain.chars() {
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
count += 1;
in_word = false;
} else if c.is_alphabetic() {
if !in_word {
count += 1;
in_word = true;
}
} else {
in_word = false;
}
}
count.max(1)
}
pub fn auto_summary(md: &str) -> String {
let plain = strip_markdown(md);
plain.chars().take(200).collect()
}