perf: pre-compile regex patterns and extract markdown stripping to utils/text.rs

2026-06-04 16:08:01 +08:00 · 2026-06-04 16:08:01 +08:00 · c27b2d513e
commit c27b2d513e
parent 28707d3c3a
2 changed files with 67 additions and 77 deletions
--- a/src/api/posts.rs
+++ b/src/api/posts.rs
@ -7,6 +7,7 @@ use crate::api::utils::{db_conn_error, query_error};
 use crate::db::pool::get_conn;
 use crate::models::post::{Post, PostStats, PostStatus, Tag};
 use crate::models::user::{User, UserRole};
 use crate::utils::text::{count_words, auto_summary};
 // ============================================================================
 // Server-side helpers (only compiled when server feature is enabled)
@ -374,82 +375,6 @@ fn slugify_heading(text: &str) -> String {
    slug
 }
 #[cfg(feature = "server")]
 fn count_words(md: &str) -> u32 {
    // Remove markdown syntax
    let mut plain = md.to_string();
    plain = regex::Regex::new(r"```[\s\S]*?```").unwrap().replace_all(&plain, "").to_string();
    plain = regex::Regex::new(r"`[^`]*`").unwrap().replace_all(&plain, "").to_string();
    plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "$1").to_string();
    plain = regex::Regex::new(r"^#{1,6}\s*").unwrap().replace_all(&plain, "").to_string();
    plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "").to_string();
    plain = plain.replace("**", "").replace("*", "").replace("__", "").replace("_", "");
    // Count Chinese characters and English words
    let mut count = 0u32;
    let mut in_word = false;
    for c in plain.chars() {
        if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
            count += 1;
            in_word = false;
        } else if c.is_alphabetic() {
            if !in_word {
                count += 1;
                in_word = true;
            }
        } else {
            in_word = false;
        }
    }
    count.max(1)
 }
 #[cfg(feature = "server")]
 fn auto_summary(md: &str) -> String {
    // Strip markdown syntax roughly: remove heading markers, bold, italic, links, code fences
    let mut plain = md.to_string();
    // Remove code blocks
    plain = regex::Regex::new(r"```[\s\S]*?```")
        .unwrap()
        .replace_all(&plain, "")
        .to_string();
    // Remove inline code
    plain = regex::Regex::new(r"`[^`]*`")
        .unwrap()
        .replace_all(&plain, "")
        .to_string();
    // Remove links: [text](url) -> text
    plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)")
        .unwrap()
        .replace_all(&plain, "$1")
        .to_string();
    // Remove heading markers
    plain = regex::Regex::new(r"^#{1,6}\s*")
        .unwrap()
        .replace_all(&plain, "")
        .to_string();
    // Remove bold/italic markers
    plain = plain
        .replace("**", "")
        .replace("*", "")
        .replace("__", "")
        .replace("_", "");
    // Remove images
    plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)")
        .unwrap()
        .replace_all(&plain, "")
        .to_string();
    // Collapse whitespace
    plain = regex::Regex::new(r"\s+")
        .unwrap()
        .replace_all(&plain, " ")
        .to_string();
    plain.trim().chars().take(200).collect()
 }
 // ============================================================================
 // Tag helpers
 // ============================================================================
--- a/src/utils/text.rs
+++ b/src/utils/text.rs
@ -1 +1,66 @@
-// Placeholder for text utilities (will be created in a later task)
+use std::sync::LazyLock;
 static CODE_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"```[\s\S]*?```").unwrap()
 });
 static INLINE_CODE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"`[^`]*`").unwrap()
 });
 static LINK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap()
 });
 static HEADING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"^#{1,6}\s*").unwrap()
 });
 static IMAGE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap()
 });
 static WHITESPACE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"\s+").unwrap()
 });
 pub fn strip_markdown(md: &str) -> String {
    let mut plain = CODE_BLOCK_RE.replace_all(md, "").to_string();
    plain = INLINE_CODE_RE.replace_all(&plain, "").to_string();
    plain = LINK_RE.replace_all(&plain, "$1").to_string();
    plain = HEADING_RE.replace_all(&plain, "").to_string();
    plain = IMAGE_RE.replace_all(&plain, "").to_string();
    plain = plain
        .replace("**", "")
        .replace('*', "")
        .replace("__", "")
        .replace('_', "");
    plain = WHITESPACE_RE.replace_all(&plain, " ").to_string();
    plain.trim().to_string()
 }
 pub fn count_words(md: &str) -> u32 {
    let plain = strip_markdown(md);
    let mut count = 0u32;
    let mut in_word = false;
    for c in plain.chars() {
        if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
            count += 1;
            in_word = false;
        } else if c.is_alphabetic() {
            if !in_word {
                count += 1;
                in_word = true;
            }
        } else {
            in_word = false;
        }
    }
    count.max(1)
 }
 pub fn auto_summary(md: &str) -> String {
    let plain = strip_markdown(md);
    plain.chars().take(200).collect()
 }