perf: pre-compile regex patterns and extract markdown stripping to utils/text.rs
This commit is contained in:
parent
28707d3c3a
commit
c27b2d513e
@ -7,6 +7,7 @@ use crate::api::utils::{db_conn_error, query_error};
|
||||
use crate::db::pool::get_conn;
|
||||
use crate::models::post::{Post, PostStats, PostStatus, Tag};
|
||||
use crate::models::user::{User, UserRole};
|
||||
use crate::utils::text::{count_words, auto_summary};
|
||||
|
||||
// ============================================================================
|
||||
// Server-side helpers (only compiled when server feature is enabled)
|
||||
@ -374,82 +375,6 @@ fn slugify_heading(text: &str) -> String {
|
||||
slug
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn count_words(md: &str) -> u32 {
|
||||
// Remove markdown syntax
|
||||
let mut plain = md.to_string();
|
||||
plain = regex::Regex::new(r"```[\s\S]*?```").unwrap().replace_all(&plain, "").to_string();
|
||||
plain = regex::Regex::new(r"`[^`]*`").unwrap().replace_all(&plain, "").to_string();
|
||||
plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "$1").to_string();
|
||||
plain = regex::Regex::new(r"^#{1,6}\s*").unwrap().replace_all(&plain, "").to_string();
|
||||
plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "").to_string();
|
||||
plain = plain.replace("**", "").replace("*", "").replace("__", "").replace("_", "");
|
||||
|
||||
// Count Chinese characters and English words
|
||||
let mut count = 0u32;
|
||||
let mut in_word = false;
|
||||
|
||||
for c in plain.chars() {
|
||||
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
|
||||
count += 1;
|
||||
in_word = false;
|
||||
} else if c.is_alphabetic() {
|
||||
if !in_word {
|
||||
count += 1;
|
||||
in_word = true;
|
||||
}
|
||||
} else {
|
||||
in_word = false;
|
||||
}
|
||||
}
|
||||
|
||||
count.max(1)
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn auto_summary(md: &str) -> String {
|
||||
// Strip markdown syntax roughly: remove heading markers, bold, italic, links, code fences
|
||||
let mut plain = md.to_string();
|
||||
// Remove code blocks
|
||||
plain = regex::Regex::new(r"```[\s\S]*?```")
|
||||
.unwrap()
|
||||
.replace_all(&plain, "")
|
||||
.to_string();
|
||||
// Remove inline code
|
||||
plain = regex::Regex::new(r"`[^`]*`")
|
||||
.unwrap()
|
||||
.replace_all(&plain, "")
|
||||
.to_string();
|
||||
// Remove links: [text](url) -> text
|
||||
plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)")
|
||||
.unwrap()
|
||||
.replace_all(&plain, "$1")
|
||||
.to_string();
|
||||
// Remove heading markers
|
||||
plain = regex::Regex::new(r"^#{1,6}\s*")
|
||||
.unwrap()
|
||||
.replace_all(&plain, "")
|
||||
.to_string();
|
||||
// Remove bold/italic markers
|
||||
plain = plain
|
||||
.replace("**", "")
|
||||
.replace("*", "")
|
||||
.replace("__", "")
|
||||
.replace("_", "");
|
||||
// Remove images
|
||||
plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)")
|
||||
.unwrap()
|
||||
.replace_all(&plain, "")
|
||||
.to_string();
|
||||
// Collapse whitespace
|
||||
plain = regex::Regex::new(r"\s+")
|
||||
.unwrap()
|
||||
.replace_all(&plain, " ")
|
||||
.to_string();
|
||||
|
||||
plain.trim().chars().take(200).collect()
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tag helpers
|
||||
// ============================================================================
|
||||
|
||||
@ -1 +1,66 @@
|
||||
// Placeholder for text utilities (will be created in a later task)
|
||||
use std::sync::LazyLock;
|
||||
|
||||
static CODE_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||
regex::Regex::new(r"```[\s\S]*?```").unwrap()
|
||||
});
|
||||
|
||||
static INLINE_CODE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||
regex::Regex::new(r"`[^`]*`").unwrap()
|
||||
});
|
||||
|
||||
static LINK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||
regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap()
|
||||
});
|
||||
|
||||
static HEADING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||
regex::Regex::new(r"^#{1,6}\s*").unwrap()
|
||||
});
|
||||
|
||||
static IMAGE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||
regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap()
|
||||
});
|
||||
|
||||
static WHITESPACE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||
regex::Regex::new(r"\s+").unwrap()
|
||||
});
|
||||
|
||||
pub fn strip_markdown(md: &str) -> String {
|
||||
let mut plain = CODE_BLOCK_RE.replace_all(md, "").to_string();
|
||||
plain = INLINE_CODE_RE.replace_all(&plain, "").to_string();
|
||||
plain = LINK_RE.replace_all(&plain, "$1").to_string();
|
||||
plain = HEADING_RE.replace_all(&plain, "").to_string();
|
||||
plain = IMAGE_RE.replace_all(&plain, "").to_string();
|
||||
plain = plain
|
||||
.replace("**", "")
|
||||
.replace('*', "")
|
||||
.replace("__", "")
|
||||
.replace('_', "");
|
||||
plain = WHITESPACE_RE.replace_all(&plain, " ").to_string();
|
||||
plain.trim().to_string()
|
||||
}
|
||||
|
||||
pub fn count_words(md: &str) -> u32 {
|
||||
let plain = strip_markdown(md);
|
||||
let mut count = 0u32;
|
||||
let mut in_word = false;
|
||||
|
||||
for c in plain.chars() {
|
||||
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
|
||||
count += 1;
|
||||
in_word = false;
|
||||
} else if c.is_alphabetic() {
|
||||
if !in_word {
|
||||
count += 1;
|
||||
in_word = true;
|
||||
}
|
||||
} else {
|
||||
in_word = false;
|
||||
}
|
||||
}
|
||||
count.max(1)
|
||||
}
|
||||
|
||||
pub fn auto_summary(md: &str) -> String {
|
||||
let plain = strip_markdown(md);
|
||||
plain.chars().take(200).collect()
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user