diff --git a/src/api/posts.rs b/src/api/posts.rs index e105b23..92ef091 100644 --- a/src/api/posts.rs +++ b/src/api/posts.rs @@ -7,6 +7,7 @@ use crate::api::utils::{db_conn_error, query_error}; use crate::db::pool::get_conn; use crate::models::post::{Post, PostStats, PostStatus, Tag}; use crate::models::user::{User, UserRole}; +use crate::utils::text::{count_words, auto_summary}; // ============================================================================ // Server-side helpers (only compiled when server feature is enabled) @@ -374,82 +375,6 @@ fn slugify_heading(text: &str) -> String { slug } -#[cfg(feature = "server")] -fn count_words(md: &str) -> u32 { - // Remove markdown syntax - let mut plain = md.to_string(); - plain = regex::Regex::new(r"```[\s\S]*?```").unwrap().replace_all(&plain, "").to_string(); - plain = regex::Regex::new(r"`[^`]*`").unwrap().replace_all(&plain, "").to_string(); - plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "$1").to_string(); - plain = regex::Regex::new(r"^#{1,6}\s*").unwrap().replace_all(&plain, "").to_string(); - plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "").to_string(); - plain = plain.replace("**", "").replace("*", "").replace("__", "").replace("_", ""); - - // Count Chinese characters and English words - let mut count = 0u32; - let mut in_word = false; - - for c in plain.chars() { - if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF { - count += 1; - in_word = false; - } else if c.is_alphabetic() { - if !in_word { - count += 1; - in_word = true; - } - } else { - in_word = false; - } - } - - count.max(1) -} - -#[cfg(feature = "server")] -fn auto_summary(md: &str) -> String { - // Strip markdown syntax roughly: remove heading markers, bold, italic, links, code fences - let mut plain = md.to_string(); - // Remove code blocks - plain = regex::Regex::new(r"```[\s\S]*?```") - .unwrap() - .replace_all(&plain, "") - .to_string(); - // Remove inline code - plain = regex::Regex::new(r"`[^`]*`") - .unwrap() - .replace_all(&plain, "") - .to_string(); - // Remove links: [text](url) -> text - plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)") - .unwrap() - .replace_all(&plain, "$1") - .to_string(); - // Remove heading markers - plain = regex::Regex::new(r"^#{1,6}\s*") - .unwrap() - .replace_all(&plain, "") - .to_string(); - // Remove bold/italic markers - plain = plain - .replace("**", "") - .replace("*", "") - .replace("__", "") - .replace("_", ""); - // Remove images - plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)") - .unwrap() - .replace_all(&plain, "") - .to_string(); - // Collapse whitespace - plain = regex::Regex::new(r"\s+") - .unwrap() - .replace_all(&plain, " ") - .to_string(); - - plain.trim().chars().take(200).collect() -} - // ============================================================================ // Tag helpers // ============================================================================ diff --git a/src/utils/text.rs b/src/utils/text.rs index 34ca8be..81b345f 100644 --- a/src/utils/text.rs +++ b/src/utils/text.rs @@ -1 +1,66 @@ -// Placeholder for text utilities (will be created in a later task) +use std::sync::LazyLock; + +static CODE_BLOCK_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"```[\s\S]*?```").unwrap() +}); + +static INLINE_CODE_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"`[^`]*`").unwrap() +}); + +static LINK_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap() +}); + +static HEADING_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"^#{1,6}\s*").unwrap() +}); + +static IMAGE_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap() +}); + +static WHITESPACE_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"\s+").unwrap() +}); + +pub fn strip_markdown(md: &str) -> String { + let mut plain = CODE_BLOCK_RE.replace_all(md, "").to_string(); + plain = INLINE_CODE_RE.replace_all(&plain, "").to_string(); + plain = LINK_RE.replace_all(&plain, "$1").to_string(); + plain = HEADING_RE.replace_all(&plain, "").to_string(); + plain = IMAGE_RE.replace_all(&plain, "").to_string(); + plain = plain + .replace("**", "") + .replace('*', "") + .replace("__", "") + .replace('_', ""); + plain = WHITESPACE_RE.replace_all(&plain, " ").to_string(); + plain.trim().to_string() +} + +pub fn count_words(md: &str) -> u32 { + let plain = strip_markdown(md); + let mut count = 0u32; + let mut in_word = false; + + for c in plain.chars() { + if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF { + count += 1; + in_word = false; + } else if c.is_alphabetic() { + if !in_word { + count += 1; + in_word = true; + } + } else { + in_word = false; + } + } + count.max(1) +} + +pub fn auto_summary(md: &str) -> String { + let plain = strip_markdown(md); + plain.chars().take(200).collect() +}