From 19e5a0be4110fa973f4d9c3b497a5ea0187a073e Mon Sep 17 00:00:00 2001 From: xfy Date: Wed, 3 Jun 2026 10:25:38 +0800 Subject: [PATCH] fix(api): correct count_words for CJK characters Chinese characters (U+4E00-U+9FFF) also return true for is_alphabetic(), so they were being counted as part of English words instead of individually. Fix: check CJK range before is_alphabetic(). --- src/api/posts.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/api/posts.rs b/src/api/posts.rs index 31d8f74..644989f 100644 --- a/src/api/posts.rs +++ b/src/api/posts.rs @@ -418,15 +418,14 @@ fn count_words(md: &str) -> u32 { let mut in_word = false; for c in plain.chars() { - if c.is_alphabetic() { + if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF { + count += 1; + in_word = false; + } else if c.is_alphabetic() { if !in_word { count += 1; in_word = true; } - } else if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF { - // Chinese character - count += 1; - in_word = false; } else { in_word = false; }