fix(api): correct count_words for CJK characters

Chinese characters (U+4E00-U+9FFF) also return true for is_alphabetic(),
so they were being counted as part of English words instead of individually.
Fix: check CJK range before is_alphabetic().
This commit is contained in:
xfy 2026-06-03 10:25:38 +08:00
parent e6c3cacf12
commit 19e5a0be41

View File

@ -418,15 +418,14 @@ fn count_words(md: &str) -> u32 {
let mut in_word = false;
for c in plain.chars() {
if c.is_alphabetic() {
if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
count += 1;
in_word = false;
} else if c.is_alphabetic() {
if !in_word {
count += 1;
in_word = true;
}
} else if c as u32 >= 0x4E00 && c as u32 <= 0x9FFF {
// Chinese character
count += 1;
in_word = false;
} else {
in_word = false;
}