refactor(sanitizer): extract shared sanitizer module and migrate from ammonia to lol_html

This commit is contained in:
xfy 2026-06-12 11:15:42 +08:00
parent b898b55308
commit a10bf8737c
6 changed files with 341 additions and 202 deletions

226
Cargo.lock generated
View File

@ -23,19 +23,6 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "ammonia"
version = "4.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6"
dependencies = [
"cssparser",
"html5ever",
"maplit",
"tendril",
"url",
]
[[package]] [[package]]
name = "android_system_properties" name = "android_system_properties"
version = "0.1.5" version = "0.1.5"
@ -721,14 +708,14 @@ dependencies = [
[[package]] [[package]]
name = "cssparser" name = "cssparser"
version = "0.35.0" version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa" checksum = "dae61cf9c0abb83bd659dab65b7e4e38d8236824c85f0f804f173567bda257d2"
dependencies = [ dependencies = [
"cssparser-macros", "cssparser-macros",
"dtoa-short", "dtoa-short",
"itoa", "itoa",
"phf 0.11.3", "phf",
"smallvec", "smallvec",
] ]
@ -1636,6 +1623,12 @@ dependencies = [
"regex-syntax", "regex-syntax",
] ]
[[package]]
name = "fastrand"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
[[package]] [[package]]
name = "fdeflate" name = "fdeflate"
version = "0.3.7" version = "0.3.7"
@ -1688,16 +1681,6 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.32" version = "0.3.32"
@ -2002,6 +1985,11 @@ name = "hashbrown"
version = "0.17.1" version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash 0.2.0",
]
[[package]] [[package]]
name = "headers" name = "headers"
@ -2054,17 +2042,6 @@ dependencies = [
"digest 0.11.3", "digest 0.11.3",
] ]
[[package]]
name = "html5ever"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
dependencies = [
"log",
"markup5ever",
"match_token",
]
[[package]] [[package]]
name = "http" name = "http"
version = "1.4.1" version = "1.4.1"
@ -2556,6 +2533,25 @@ version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
[[package]]
name = "lol_html"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00aad58f6ec3990e795943872f13651e7a5fa59dca2c8f31a74faf8a0e0fb652"
dependencies = [
"bitflags",
"cfg-if",
"cssparser",
"encoding_rs",
"foldhash 0.2.0",
"hashbrown 0.17.1",
"memchr",
"mime",
"precomputed-hash",
"selectors",
"thiserror 2.0.18",
]
[[package]] [[package]]
name = "longest-increasing-subsequence" name = "longest-increasing-subsequence"
version = "0.1.0" version = "0.1.0"
@ -2577,12 +2573,6 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "macro-string" name = "macro-string"
version = "0.1.4" version = "0.1.4"
@ -2647,34 +2637,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
dependencies = [
"log",
"tendril",
"web_atoms",
]
[[package]]
name = "match_token"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "matchers" name = "matchers"
version = "0.2.0" version = "0.2.0"
@ -3007,68 +2969,50 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared 0.11.3",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.13.1" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
dependencies = [ dependencies = [
"phf_shared 0.13.1", "phf_macros",
"phf_shared",
"serde", "serde",
] ]
[[package]] [[package]]
name = "phf_codegen" name = "phf_codegen"
version = "0.11.3" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
dependencies = [ dependencies = [
"phf_generator", "phf_generator",
"phf_shared 0.11.3", "phf_shared",
] ]
[[package]] [[package]]
name = "phf_generator" name = "phf_generator"
version = "0.11.3" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
dependencies = [ dependencies = [
"phf_shared 0.11.3", "fastrand",
"rand 0.8.6", "phf_shared",
] ]
[[package]] [[package]]
name = "phf_macros" name = "phf_macros"
version = "0.11.3" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
dependencies = [ dependencies = [
"phf_generator", "phf_generator",
"phf_shared 0.11.3", "phf_shared",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn", "syn",
] ]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher",
]
[[package]] [[package]]
name = "phf_shared" name = "phf_shared"
version = "0.13.1" version = "0.13.1"
@ -3675,6 +3619,25 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "selectors"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cfaaa6035167f0e604e42723c7650d59ee269ef220d7bbe0565602c8a0173b9"
dependencies = [
"bitflags",
"cssparser",
"derive_more",
"log",
"new_debug_unreachable",
"phf",
"phf_codegen",
"precomputed-hash",
"rustc-hash 2.1.2",
"servo_arc",
"smallvec",
]
[[package]] [[package]]
name = "semver" name = "semver"
version = "1.0.28" version = "1.0.28"
@ -3789,6 +3752,15 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "servo_arc"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
dependencies = [
"stable_deref_trait",
]
[[package]] [[package]]
name = "sha1" name = "sha1"
version = "0.10.6" version = "0.10.6"
@ -3931,31 +3903,6 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
[[package]] [[package]]
name = "stringprep" name = "stringprep"
version = "0.1.5" version = "0.1.5"
@ -4080,17 +4027,6 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.69" version = "1.0.69"
@ -4237,7 +4173,7 @@ dependencies = [
"log", "log",
"parking_lot", "parking_lot",
"percent-encoding", "percent-encoding",
"phf 0.13.1", "phf",
"pin-project-lite", "pin-project-lite",
"postgres-protocol", "postgres-protocol",
"postgres-types", "postgres-types",
@ -4848,18 +4784,6 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "web_atoms"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
dependencies = [
"phf 0.11.3",
"phf_codegen",
"string_cache",
"string_cache_codegen",
]
[[package]] [[package]]
name = "webpki-roots" name = "webpki-roots"
version = "1.0.7" version = "1.0.7"
@ -5354,7 +5278,6 @@ dependencies = [
name = "yggdrasil" name = "yggdrasil"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"ammonia",
"argon2", "argon2",
"axum", "axum",
"chrono", "chrono",
@ -5367,6 +5290,7 @@ dependencies = [
"http", "http",
"image", "image",
"js-sys", "js-sys",
"lol_html",
"md-5 0.10.6", "md-5 0.10.6",
"moka", "moka",
"pulldown-cmark", "pulldown-cmark",

View File

@ -25,7 +25,7 @@ axum = { version = "0.8", optional = true, features = ["multipart"] }
serde_json = "1.0" serde_json = "1.0"
sha2 = "0.10" sha2 = "0.10"
hex = "0.4" hex = "0.4"
ammonia = { version = "4", optional = true } lol_html = { version = "2", optional = true }
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "default-fancy", "html", "parsing", "dump-load", "yaml-load"], optional = true } syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "default-fancy", "html", "parsing", "dump-load", "yaml-load"], optional = true }
# NOTE: WebP decoder is intentionally excluded from the image crate. # NOTE: WebP decoder is intentionally excluded from the image crate.
# We use zenwebp for both encoding and decoding to ensure consistency. # We use zenwebp for both encoding and decoding to ensure consistency.
@ -63,7 +63,7 @@ server = [
"dep:tracing", "dep:tracing",
"dep:tracing-subscriber", "dep:tracing-subscriber",
"dep:tower-http", "dep:tower-http",
"dep:ammonia", "dep:lol_html",
"dep:syntect", "dep:syntect",
"dep:axum", "dep:axum",
"dep:image", "dep:image",

View File

@ -8,29 +8,9 @@ fn html_escape(s: &str) -> String {
.replace('"', """) .replace('"', """)
} }
#[cfg(feature = "server")]
static COMMENT_AMMONIA_BUILDER: std::sync::LazyLock<ammonia::Builder> = std::sync::LazyLock::new(|| {
let mut builder = ammonia::Builder::default();
builder
.rm_tags(["img", "details", "summary"])
.add_generic_attributes(&[
"class",
"title",
"aria-hidden",
"aria-label",
"role",
"accesskey",
])
.url_relative(ammonia::UrlRelative::PassThrough)
.add_tag_attributes("a", &["class", "aria-hidden", "aria-label"])
.add_tag_attributes("span", &["class"])
.link_rel(Some("nofollow noopener"));
builder
});
#[cfg(feature = "server")] #[cfg(feature = "server")]
pub fn clean_comment_html(input: &str) -> String { pub fn clean_comment_html(input: &str) -> String {
COMMENT_AMMONIA_BUILDER.clean(input).to_string() crate::api::sanitizer::clean_comment_html(input)
} }
#[cfg(feature = "server")] #[cfg(feature = "server")]

View File

@ -1,35 +1,8 @@
#![allow(clippy::unused_unit, deprecated, unused_imports)] #![allow(clippy::unused_unit, deprecated, unused_imports)]
#[cfg(feature = "server")]
static AMMONIA_BUILDER: std::sync::LazyLock<ammonia::Builder> = std::sync::LazyLock::new(|| {
let mut builder = ammonia::Builder::default();
builder
.add_generic_attributes(&[
"class",
"aria-hidden",
"aria-label",
"id",
"role",
"accesskey",
"title",
])
.add_tags(&["details", "summary"])
.url_relative(ammonia::UrlRelative::PassThrough)
.add_url_schemes(&["data"])
.add_tag_attributes("a", &["class", "aria-hidden", "aria-label"])
.add_tag_attributes("span", &["class"])
.add_tag_attributes("h1", &["id", "class"])
.add_tag_attributes("h2", &["id", "class"])
.add_tag_attributes("h3", &["id", "class"])
.add_tag_attributes("h4", &["id", "class"])
.add_tag_attributes("h5", &["id", "class"])
.add_tag_attributes("h6", &["id", "class"]);
builder
});
#[cfg(feature = "server")] #[cfg(feature = "server")]
pub fn clean_html(input: &str) -> String { pub fn clean_html(input: &str) -> String {
AMMONIA_BUILDER.clean(input).to_string() crate::api::sanitizer::clean_html(input)
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

View File

@ -5,5 +5,6 @@ pub mod image;
pub mod markdown; pub mod markdown;
pub mod posts; pub mod posts;
pub mod rate_limit; pub mod rate_limit;
pub mod sanitizer;
pub mod slug; pub mod slug;
pub mod upload; pub mod upload;

261
src/api/sanitizer.rs Normal file
View File

@ -0,0 +1,261 @@
#![allow(clippy::unused_unit, deprecated, unused_imports)]
#[cfg(feature = "server")]
use std::collections::HashSet;
#[cfg(feature = "server")]
fn default_allowed_tags() -> HashSet<&'static str> {
let mut set = HashSet::new();
for tag in [
"a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
"bdo", "blockquote", "br", "caption", "center", "cite", "code",
"col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
"dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
"h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
"ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
"q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
"strike", "strong", "sub", "summary", "sup", "table", "tbody",
"td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr",
] {
set.insert(tag);
}
set
}
#[cfg(feature = "server")]
fn clean_content_tags() -> HashSet<&'static str> {
let mut set = HashSet::new();
set.insert("script");
set.insert("style");
set
}
#[cfg(feature = "server")]
fn default_allowed_schemes() -> HashSet<&'static str> {
let mut set = HashSet::new();
for scheme in [
"bitcoin", "ftp", "ftps", "geo", "http", "https", "im", "irc",
"ircs", "magnet", "mailto", "mms", "mx", "news", "nntp",
"openpgp4fpr", "sip", "sms", "smsto", "ssh", "tel", "url",
"webcal", "wtai", "xmpp",
] {
set.insert(scheme);
}
set
}
#[cfg(feature = "server")]
fn is_safe_url(url: &str, allowed_schemes: &HashSet<&str>, allow_data_uri: bool) -> bool {
let trimmed = url.trim();
if trimmed.is_empty() {
return true;
}
if let Some(colon_pos) = trimmed.find(':') {
let scheme = &trimmed[..colon_pos];
let scheme_lower = scheme.to_lowercase();
if allowed_schemes.contains(scheme_lower.as_str()) {
return true;
}
if scheme_lower == "data" {
return allow_data_uri;
}
if scheme_lower == "javascript" || scheme_lower == "vbscript" {
return false;
}
if scheme.contains(|c: char| c.is_ascii_whitespace()) {
return false;
}
}
if trimmed.starts_with('#') || trimmed.starts_with('/') {
return true;
}
true
}
#[cfg(feature = "server")]
struct SanitizerConfig {
allowed_tags: HashSet<&'static str>,
extra_generic_attrs: Vec<&'static str>,
extra_tag_attrs: Vec<(&'static str, Vec<&'static str>)>,
allowed_schemes: HashSet<&'static str>,
allow_data_uri: bool,
link_rel: Option<&'static str>,
remove_tags: HashSet<&'static str>,
}
#[cfg(feature = "server")]
fn sanitize(input: &str, config: &SanitizerConfig) -> String {
let allowed_tags = config.allowed_tags.clone();
let remove_tags = config.remove_tags.clone();
let generic_attrs: HashSet<&str> = config
.extra_generic_attrs
.iter()
.copied()
.chain(["lang", "title"])
.collect();
let tag_attrs_map: std::collections::HashMap<&str, HashSet<&str>> = {
let mut m = std::collections::HashMap::new();
let base = [
("a", vec!["href", "hreflang"]),
("bdo", vec!["dir"]),
("blockquote", vec!["cite"]),
("col", vec!["align", "char", "charoff", "span"]),
("colgroup", vec!["align", "char", "charoff", "span"]),
("del", vec!["cite", "datetime"]),
("hr", vec!["align", "size", "width"]),
("img", vec!["align", "alt", "height", "src", "width"]),
("ins", vec!["cite", "datetime"]),
("ol", vec!["start"]),
("q", vec!["cite"]),
("table", vec!["align", "char", "charoff", "summary"]),
("tbody", vec!["align", "char", "charoff"]),
("td", vec!["align", "char", "charoff", "colspan", "headers", "rowspan"]),
("tfoot", vec!["align", "char", "charoff"]),
("th", vec!["align", "char", "charoff", "colspan", "headers", "rowspan", "scope"]),
("thead", vec!["align", "char", "charoff"]),
("tr", vec!["align", "char", "charoff"]),
];
for (tag, attrs) in &base {
m.insert(*tag, attrs.iter().copied().collect());
}
for (tag, attrs) in &config.extra_tag_attrs {
m.entry(tag)
.or_insert_with(HashSet::new)
.extend(attrs.iter().copied());
}
m
};
let allowed_schemes = config.allowed_schemes.clone();
let allow_data_uri = config.allow_data_uri;
let link_rel = config.link_rel;
let element_handler = move |el: &mut lol_html::html_content::Element| {
let tag = el.tag_name().to_lowercase();
if remove_tags.contains(tag.as_str()) {
el.remove();
return Ok(());
}
if !allowed_tags.contains(tag.as_str()) {
el.remove_and_keep_content();
return Ok(());
}
let allowed_for_tag: HashSet<&str> = {
let mut s = generic_attrs.clone();
if let Some(tag_specific) = tag_attrs_map.get(tag.as_str()) {
s.extend(tag_specific.iter().copied());
}
s
};
let attrs_to_remove: Vec<String> = el
.attributes()
.iter()
.filter_map(|attr| {
let name = attr.name();
let name_lower = name.to_lowercase();
if allowed_for_tag.contains(name_lower.as_str()) {
if name_lower == "href" || name_lower == "src" || name_lower == "cite" {
let val = attr.value();
if !is_safe_url(&val, &allowed_schemes, allow_data_uri) {
return Some(name);
}
}
None
} else {
Some(name)
}
})
.collect();
for attr_name in attrs_to_remove {
el.remove_attribute(&attr_name);
}
if link_rel.is_some() && tag == "a" {
if let Some(rel) = link_rel {
let existing = el.get_attribute("rel").unwrap_or_default();
if existing != rel {
el.set_attribute("rel", rel).ok();
}
}
}
Ok(())
};
lol_html::rewrite_str(
input,
lol_html::RewriteStrSettings {
element_content_handlers: vec![lol_html::element!("*", element_handler)],
document_content_handlers: vec![lol_html::doc_comments!(|c| {
c.remove();
Ok(())
})],
..lol_html::RewriteStrSettings::new()
},
)
.unwrap_or_default()
}
#[cfg(feature = "server")]
pub fn clean_html(input: &str) -> String {
let config = SanitizerConfig {
allowed_tags: default_allowed_tags(),
extra_generic_attrs: vec![
"class",
"aria-hidden",
"aria-label",
"id",
"role",
"accesskey",
"title",
],
extra_tag_attrs: vec![
("a", vec!["class", "aria-hidden", "aria-label"]),
("span", vec!["class"]),
("h1", vec!["id", "class"]),
("h2", vec!["id", "class"]),
("h3", vec!["id", "class"]),
("h4", vec!["id", "class"]),
("h5", vec!["id", "class"]),
("h6", vec!["id", "class"]),
],
allowed_schemes: default_allowed_schemes(),
allow_data_uri: true,
link_rel: Some("noopener noreferrer"),
remove_tags: clean_content_tags(),
};
sanitize(input, &config)
}
#[cfg(feature = "server")]
pub fn clean_comment_html(input: &str) -> String {
let mut tags = default_allowed_tags();
tags.remove("img");
tags.remove("details");
tags.remove("summary");
let config = SanitizerConfig {
allowed_tags: tags,
extra_generic_attrs: vec![
"class",
"title",
"aria-hidden",
"aria-label",
"role",
"accesskey",
],
extra_tag_attrs: vec![
("a", vec!["class", "aria-hidden", "aria-label"]),
("span", vec!["class"]),
],
allowed_schemes: default_allowed_schemes(),
allow_data_uri: false,
link_rel: Some("nofollow noopener"),
remove_tags: clean_content_tags(),
};
sanitize(input, &config)
}