perf: pre-compile regex patterns and extract markdown stripping to utils/text.rs

2026-06-04 16:08:01 +08:00 · 2026-06-04 16:08:01 +08:00 · c27b2d513e
commit c27b2d513e
parent 28707d3c3a
2 changed files with 67 additions and 77 deletions
--- a/src/api/posts.rs
+++ b/src/api/posts.rs
@ -7,6 +7,7 @@ use crate::api::utils::{db_conn_error, query_error};
 use crate::db::pool::get_conn;
 use crate::models::post::{Post, PostStats, PostStatus, Tag};
 use crate::models::user::{User, UserRole};
+use crate::utils::text::{count_words, auto_summary};

 // ============================================================================
 // Server-side helpers (only compiled when server feature is enabled)
@ -374,82 +375,6 @@ fn slugify_heading(text: &str) -> String {
    slug
 }

-#[cfg(feature = "server")]
-fn count_words(md: &str) -> u32 {
-    // Remove markdown syntax
-    let mut plain = md.to_string();
-    plain = regex::Regex::new(r"```[\s\S]*?```").unwrap().replace_all(&plain, "").to_string();
-    plain = regex::Regex::new(r"`[^`]*`").unwrap().replace_all(&plain, "").to_string();
-    plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "$1").to_string();
-    plain = regex::Regex::new(r"^#{1,6}\s*").unwrap().replace_all(&plain, "").to_string();
-    plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "").to_string();
-    plain = plain.replace("**", "").replace("*", "").replace("__", "").replace("_", "");
-
-    // Count Chinese characters and English words
-    let mut count = 0u32;
-    let mut in_word = false;
-
-    for c in plain.chars() {
-        if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
-            count += 1;
-            in_word = false;
-        } else if c.is_alphabetic() {
-            if !in_word {
-                count += 1;
-                in_word = true;
-            }
-        } else {
-            in_word = false;
-        }
-    }
-
-    count.max(1)
-}
-
-#[cfg(feature = "server")]
-fn auto_summary(md: &str) -> String {
-    // Strip markdown syntax roughly: remove heading markers, bold, italic, links, code fences
-    let mut plain = md.to_string();
-    // Remove code blocks
-    plain = regex::Regex::new(r"```[\s\S]*?```")
-        .unwrap()
-        .replace_all(&plain, "")
-        .to_string();
-    // Remove inline code
-    plain = regex::Regex::new(r"`[^`]*`")
-        .unwrap()
-        .replace_all(&plain, "")
-        .to_string();
-    // Remove links: [text](url) -> text
-    plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)")
-        .unwrap()
-        .replace_all(&plain, "$1")
-        .to_string();
-    // Remove heading markers
-    plain = regex::Regex::new(r"^#{1,6}\s*")
-        .unwrap()
-        .replace_all(&plain, "")
-        .to_string();
-    // Remove bold/italic markers
-    plain = plain
-        .replace("**", "")
-        .replace("*", "")
-        .replace("__", "")
-        .replace("_", "");
-    // Remove images
-    plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)")
-        .unwrap()
-        .replace_all(&plain, "")
-        .to_string();
-    // Collapse whitespace
-    plain = regex::Regex::new(r"\s+")
-        .unwrap()
-        .replace_all(&plain, " ")
-        .to_string();
-
-    plain.trim().chars().take(200).collect()
-}
-
 // ============================================================================
 // Tag helpers
 // ============================================================================
--- a/src/utils/text.rs
+++ b/src/utils/text.rs
@ -1 +1,66 @@
-// Placeholder for text utilities (will be created in a later task)
+use std::sync::LazyLock;
+
+static CODE_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"```[\s\S]*?```").unwrap()
+});
+
+static INLINE_CODE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"`[^`]*`").unwrap()
+});
+
+static LINK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap()
+});
+
+static HEADING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"^#{1,6}\s*").unwrap()
+});
+
+static IMAGE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap()
+});
+
+static WHITESPACE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"\s+").unwrap()
+});
+
+pub fn strip_markdown(md: &str) -> String {
+    let mut plain = CODE_BLOCK_RE.replace_all(md, "").to_string();
+    plain = INLINE_CODE_RE.replace_all(&plain, "").to_string();
+    plain = LINK_RE.replace_all(&plain, "$1").to_string();
+    plain = HEADING_RE.replace_all(&plain, "").to_string();
+    plain = IMAGE_RE.replace_all(&plain, "").to_string();
+    plain = plain
+        .replace("**", "")
+        .replace('*', "")
+        .replace("__", "")
+        .replace('_', "");
+    plain = WHITESPACE_RE.replace_all(&plain, " ").to_string();
+    plain.trim().to_string()
+}
+
+pub fn count_words(md: &str) -> u32 {
+    let plain = strip_markdown(md);
+    let mut count = 0u32;
+    let mut in_word = false;
+
+    for c in plain.chars() {
+        if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
+            count += 1;
+            in_word = false;
+        } else if c.is_alphabetic() {
+            if !in_word {
+                count += 1;
+                in_word = true;
+            }
+        } else {
+            in_word = false;
+        }
+    }
+    count.max(1)
+}
+
+pub fn auto_summary(md: &str) -> String {
+    let plain = strip_markdown(md);
+    plain.chars().take(200).collect()
+}