perf: pre-compile regex patterns and extract markdown stripping to utils/text.rs
This commit is contained in:
parent
28707d3c3a
commit
c27b2d513e
@ -7,6 +7,7 @@ use crate::api::utils::{db_conn_error, query_error};
|
|||||||
use crate::db::pool::get_conn;
|
use crate::db::pool::get_conn;
|
||||||
use crate::models::post::{Post, PostStats, PostStatus, Tag};
|
use crate::models::post::{Post, PostStats, PostStatus, Tag};
|
||||||
use crate::models::user::{User, UserRole};
|
use crate::models::user::{User, UserRole};
|
||||||
|
use crate::utils::text::{count_words, auto_summary};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Server-side helpers (only compiled when server feature is enabled)
|
// Server-side helpers (only compiled when server feature is enabled)
|
||||||
@ -374,82 +375,6 @@ fn slugify_heading(text: &str) -> String {
|
|||||||
slug
|
slug
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "server")]
|
|
||||||
fn count_words(md: &str) -> u32 {
|
|
||||||
// Remove markdown syntax
|
|
||||||
let mut plain = md.to_string();
|
|
||||||
plain = regex::Regex::new(r"```[\s\S]*?```").unwrap().replace_all(&plain, "").to_string();
|
|
||||||
plain = regex::Regex::new(r"`[^`]*`").unwrap().replace_all(&plain, "").to_string();
|
|
||||||
plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "$1").to_string();
|
|
||||||
plain = regex::Regex::new(r"^#{1,6}\s*").unwrap().replace_all(&plain, "").to_string();
|
|
||||||
plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap().replace_all(&plain, "").to_string();
|
|
||||||
plain = plain.replace("**", "").replace("*", "").replace("__", "").replace("_", "");
|
|
||||||
|
|
||||||
// Count Chinese characters and English words
|
|
||||||
let mut count = 0u32;
|
|
||||||
let mut in_word = false;
|
|
||||||
|
|
||||||
for c in plain.chars() {
|
|
||||||
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
|
|
||||||
count += 1;
|
|
||||||
in_word = false;
|
|
||||||
} else if c.is_alphabetic() {
|
|
||||||
if !in_word {
|
|
||||||
count += 1;
|
|
||||||
in_word = true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
in_word = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
count.max(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "server")]
|
|
||||||
fn auto_summary(md: &str) -> String {
|
|
||||||
// Strip markdown syntax roughly: remove heading markers, bold, italic, links, code fences
|
|
||||||
let mut plain = md.to_string();
|
|
||||||
// Remove code blocks
|
|
||||||
plain = regex::Regex::new(r"```[\s\S]*?```")
|
|
||||||
.unwrap()
|
|
||||||
.replace_all(&plain, "")
|
|
||||||
.to_string();
|
|
||||||
// Remove inline code
|
|
||||||
plain = regex::Regex::new(r"`[^`]*`")
|
|
||||||
.unwrap()
|
|
||||||
.replace_all(&plain, "")
|
|
||||||
.to_string();
|
|
||||||
// Remove links: [text](url) -> text
|
|
||||||
plain = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)")
|
|
||||||
.unwrap()
|
|
||||||
.replace_all(&plain, "$1")
|
|
||||||
.to_string();
|
|
||||||
// Remove heading markers
|
|
||||||
plain = regex::Regex::new(r"^#{1,6}\s*")
|
|
||||||
.unwrap()
|
|
||||||
.replace_all(&plain, "")
|
|
||||||
.to_string();
|
|
||||||
// Remove bold/italic markers
|
|
||||||
plain = plain
|
|
||||||
.replace("**", "")
|
|
||||||
.replace("*", "")
|
|
||||||
.replace("__", "")
|
|
||||||
.replace("_", "");
|
|
||||||
// Remove images
|
|
||||||
plain = regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)")
|
|
||||||
.unwrap()
|
|
||||||
.replace_all(&plain, "")
|
|
||||||
.to_string();
|
|
||||||
// Collapse whitespace
|
|
||||||
plain = regex::Regex::new(r"\s+")
|
|
||||||
.unwrap()
|
|
||||||
.replace_all(&plain, " ")
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
plain.trim().chars().take(200).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Tag helpers
|
// Tag helpers
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@ -1 +1,66 @@
|
|||||||
// Placeholder for text utilities (will be created in a later task)
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
|
static CODE_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||||
|
regex::Regex::new(r"```[\s\S]*?```").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
static INLINE_CODE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||||
|
regex::Regex::new(r"`[^`]*`").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
static LINK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||||
|
regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
static HEADING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||||
|
regex::Regex::new(r"^#{1,6}\s*").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
static IMAGE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||||
|
regex::Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
static WHITESPACE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
|
||||||
|
regex::Regex::new(r"\s+").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
pub fn strip_markdown(md: &str) -> String {
|
||||||
|
let mut plain = CODE_BLOCK_RE.replace_all(md, "").to_string();
|
||||||
|
plain = INLINE_CODE_RE.replace_all(&plain, "").to_string();
|
||||||
|
plain = LINK_RE.replace_all(&plain, "$1").to_string();
|
||||||
|
plain = HEADING_RE.replace_all(&plain, "").to_string();
|
||||||
|
plain = IMAGE_RE.replace_all(&plain, "").to_string();
|
||||||
|
plain = plain
|
||||||
|
.replace("**", "")
|
||||||
|
.replace('*', "")
|
||||||
|
.replace("__", "")
|
||||||
|
.replace('_', "");
|
||||||
|
plain = WHITESPACE_RE.replace_all(&plain, " ").to_string();
|
||||||
|
plain.trim().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn count_words(md: &str) -> u32 {
|
||||||
|
let plain = strip_markdown(md);
|
||||||
|
let mut count = 0u32;
|
||||||
|
let mut in_word = false;
|
||||||
|
|
||||||
|
for c in plain.chars() {
|
||||||
|
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
|
||||||
|
count += 1;
|
||||||
|
in_word = false;
|
||||||
|
} else if c.is_alphabetic() {
|
||||||
|
if !in_word {
|
||||||
|
count += 1;
|
||||||
|
in_word = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
in_word = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count.max(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn auto_summary(md: &str) -> String {
|
||||||
|
let plain = strip_markdown(md);
|
||||||
|
plain.chars().take(200).collect()
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user