refactor(sanitizer): extract shared sanitizer module and migrate from ammonia to lol_html
This commit is contained in:
parent
b898b55308
commit
a10bf8737c
226
Cargo.lock
generated
226
Cargo.lock
generated
@ -23,19 +23,6 @@ version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||
|
||||
[[package]]
|
||||
name = "ammonia"
|
||||
version = "4.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6"
|
||||
dependencies = [
|
||||
"cssparser",
|
||||
"html5ever",
|
||||
"maplit",
|
||||
"tendril",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
@ -721,14 +708,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.35.0"
|
||||
version = "0.36.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa"
|
||||
checksum = "dae61cf9c0abb83bd659dab65b7e4e38d8236824c85f0f804f173567bda257d2"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa",
|
||||
"phf 0.11.3",
|
||||
"phf",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
@ -1636,6 +1623,12 @@ dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
|
||||
|
||||
[[package]]
|
||||
name = "fdeflate"
|
||||
version = "0.3.7"
|
||||
@ -1688,16 +1681,6 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.32"
|
||||
@ -2002,6 +1985,11 @@ name = "hashbrown"
|
||||
version = "0.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "headers"
|
||||
@ -2054,17 +2042,6 @@ dependencies = [
|
||||
"digest 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"markup5ever",
|
||||
"match_token",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.4.1"
|
||||
@ -2556,6 +2533,25 @@ version = "0.4.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
|
||||
|
||||
[[package]]
|
||||
name = "lol_html"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00aad58f6ec3990e795943872f13651e7a5fa59dca2c8f31a74faf8a0e0fb652"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"cssparser",
|
||||
"encoding_rs",
|
||||
"foldhash 0.2.0",
|
||||
"hashbrown 0.17.1",
|
||||
"memchr",
|
||||
"mime",
|
||||
"precomputed-hash",
|
||||
"selectors",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "longest-increasing-subsequence"
|
||||
version = "0.1.0"
|
||||
@ -2577,12 +2573,6 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "macro-string"
|
||||
version = "0.1.4"
|
||||
@ -2647,34 +2637,6 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"tendril",
|
||||
"web_atoms",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "match_token"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.2.0"
|
||||
@ -3007,68 +2969,50 @@ version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
|
||||
dependencies = [
|
||||
"phf_shared 0.13.1",
|
||||
"phf_macros",
|
||||
"phf_shared",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.11.3"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||
checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared 0.11.3",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.3"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||
checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
|
||||
dependencies = [
|
||||
"phf_shared 0.11.3",
|
||||
"rand 0.8.6",
|
||||
"fastrand",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.3"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
||||
checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared 0.11.3",
|
||||
"phf_shared",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.13.1"
|
||||
@ -3675,6 +3619,25 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.37.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2cfaaa6035167f0e604e42723c7650d59ee269ef220d7bbe0565602c8a0173b9"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cssparser",
|
||||
"derive_more",
|
||||
"log",
|
||||
"new_debug_unreachable",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"precomputed-hash",
|
||||
"rustc-hash 2.1.2",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.28"
|
||||
@ -3789,6 +3752,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
@ -3931,31 +3903,6 @@ version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"parking_lot",
|
||||
"phf_shared 0.11.3",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared 0.11.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stringprep"
|
||||
version = "0.1.5"
|
||||
@ -4080,17 +4027,6 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
@ -4237,7 +4173,7 @@ dependencies = [
|
||||
"log",
|
||||
"parking_lot",
|
||||
"percent-encoding",
|
||||
"phf 0.13.1",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
@ -4848,18 +4784,6 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "web_atoms"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
|
||||
dependencies = [
|
||||
"phf 0.11.3",
|
||||
"phf_codegen",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "1.0.7"
|
||||
@ -5354,7 +5278,6 @@ dependencies = [
|
||||
name = "yggdrasil"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"ammonia",
|
||||
"argon2",
|
||||
"axum",
|
||||
"chrono",
|
||||
@ -5367,6 +5290,7 @@ dependencies = [
|
||||
"http",
|
||||
"image",
|
||||
"js-sys",
|
||||
"lol_html",
|
||||
"md-5 0.10.6",
|
||||
"moka",
|
||||
"pulldown-cmark",
|
||||
|
||||
@ -25,7 +25,7 @@ axum = { version = "0.8", optional = true, features = ["multipart"] }
|
||||
serde_json = "1.0"
|
||||
sha2 = "0.10"
|
||||
hex = "0.4"
|
||||
ammonia = { version = "4", optional = true }
|
||||
lol_html = { version = "2", optional = true }
|
||||
syntect = { version = "5", default-features = false, features = ["default-syntaxes", "default-themes", "default-fancy", "html", "parsing", "dump-load", "yaml-load"], optional = true }
|
||||
# NOTE: WebP decoder is intentionally excluded from the image crate.
|
||||
# We use zenwebp for both encoding and decoding to ensure consistency.
|
||||
@ -63,7 +63,7 @@ server = [
|
||||
"dep:tracing",
|
||||
"dep:tracing-subscriber",
|
||||
"dep:tower-http",
|
||||
"dep:ammonia",
|
||||
"dep:lol_html",
|
||||
"dep:syntect",
|
||||
"dep:axum",
|
||||
"dep:image",
|
||||
|
||||
@ -8,29 +8,9 @@ fn html_escape(s: &str) -> String {
|
||||
.replace('"', """)
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
static COMMENT_AMMONIA_BUILDER: std::sync::LazyLock<ammonia::Builder> = std::sync::LazyLock::new(|| {
|
||||
let mut builder = ammonia::Builder::default();
|
||||
builder
|
||||
.rm_tags(["img", "details", "summary"])
|
||||
.add_generic_attributes(&[
|
||||
"class",
|
||||
"title",
|
||||
"aria-hidden",
|
||||
"aria-label",
|
||||
"role",
|
||||
"accesskey",
|
||||
])
|
||||
.url_relative(ammonia::UrlRelative::PassThrough)
|
||||
.add_tag_attributes("a", &["class", "aria-hidden", "aria-label"])
|
||||
.add_tag_attributes("span", &["class"])
|
||||
.link_rel(Some("nofollow noopener"));
|
||||
builder
|
||||
});
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
pub fn clean_comment_html(input: &str) -> String {
|
||||
COMMENT_AMMONIA_BUILDER.clean(input).to_string()
|
||||
crate::api::sanitizer::clean_comment_html(input)
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
|
||||
@ -1,35 +1,8 @@
|
||||
#![allow(clippy::unused_unit, deprecated, unused_imports)]
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
static AMMONIA_BUILDER: std::sync::LazyLock<ammonia::Builder> = std::sync::LazyLock::new(|| {
|
||||
let mut builder = ammonia::Builder::default();
|
||||
builder
|
||||
.add_generic_attributes(&[
|
||||
"class",
|
||||
"aria-hidden",
|
||||
"aria-label",
|
||||
"id",
|
||||
"role",
|
||||
"accesskey",
|
||||
"title",
|
||||
])
|
||||
.add_tags(&["details", "summary"])
|
||||
.url_relative(ammonia::UrlRelative::PassThrough)
|
||||
.add_url_schemes(&["data"])
|
||||
.add_tag_attributes("a", &["class", "aria-hidden", "aria-label"])
|
||||
.add_tag_attributes("span", &["class"])
|
||||
.add_tag_attributes("h1", &["id", "class"])
|
||||
.add_tag_attributes("h2", &["id", "class"])
|
||||
.add_tag_attributes("h3", &["id", "class"])
|
||||
.add_tag_attributes("h4", &["id", "class"])
|
||||
.add_tag_attributes("h5", &["id", "class"])
|
||||
.add_tag_attributes("h6", &["id", "class"]);
|
||||
builder
|
||||
});
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
pub fn clean_html(input: &str) -> String {
|
||||
AMMONIA_BUILDER.clean(input).to_string()
|
||||
crate::api::sanitizer::clean_html(input)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@ -5,5 +5,6 @@ pub mod image;
|
||||
pub mod markdown;
|
||||
pub mod posts;
|
||||
pub mod rate_limit;
|
||||
pub mod sanitizer;
|
||||
pub mod slug;
|
||||
pub mod upload;
|
||||
|
||||
261
src/api/sanitizer.rs
Normal file
261
src/api/sanitizer.rs
Normal file
@ -0,0 +1,261 @@
|
||||
#![allow(clippy::unused_unit, deprecated, unused_imports)]
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn default_allowed_tags() -> HashSet<&'static str> {
|
||||
let mut set = HashSet::new();
|
||||
for tag in [
|
||||
"a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
|
||||
"bdo", "blockquote", "br", "caption", "center", "cite", "code",
|
||||
"col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
|
||||
"dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
|
||||
"h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
|
||||
"ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
|
||||
"q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
|
||||
"strike", "strong", "sub", "summary", "sup", "table", "tbody",
|
||||
"td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr",
|
||||
] {
|
||||
set.insert(tag);
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn clean_content_tags() -> HashSet<&'static str> {
|
||||
let mut set = HashSet::new();
|
||||
set.insert("script");
|
||||
set.insert("style");
|
||||
set
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn default_allowed_schemes() -> HashSet<&'static str> {
|
||||
let mut set = HashSet::new();
|
||||
for scheme in [
|
||||
"bitcoin", "ftp", "ftps", "geo", "http", "https", "im", "irc",
|
||||
"ircs", "magnet", "mailto", "mms", "mx", "news", "nntp",
|
||||
"openpgp4fpr", "sip", "sms", "smsto", "ssh", "tel", "url",
|
||||
"webcal", "wtai", "xmpp",
|
||||
] {
|
||||
set.insert(scheme);
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn is_safe_url(url: &str, allowed_schemes: &HashSet<&str>, allow_data_uri: bool) -> bool {
|
||||
let trimmed = url.trim();
|
||||
if trimmed.is_empty() {
|
||||
return true;
|
||||
}
|
||||
if let Some(colon_pos) = trimmed.find(':') {
|
||||
let scheme = &trimmed[..colon_pos];
|
||||
let scheme_lower = scheme.to_lowercase();
|
||||
if allowed_schemes.contains(scheme_lower.as_str()) {
|
||||
return true;
|
||||
}
|
||||
if scheme_lower == "data" {
|
||||
return allow_data_uri;
|
||||
}
|
||||
if scheme_lower == "javascript" || scheme_lower == "vbscript" {
|
||||
return false;
|
||||
}
|
||||
if scheme.contains(|c: char| c.is_ascii_whitespace()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if trimmed.starts_with('#') || trimmed.starts_with('/') {
|
||||
return true;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
struct SanitizerConfig {
|
||||
allowed_tags: HashSet<&'static str>,
|
||||
extra_generic_attrs: Vec<&'static str>,
|
||||
extra_tag_attrs: Vec<(&'static str, Vec<&'static str>)>,
|
||||
allowed_schemes: HashSet<&'static str>,
|
||||
allow_data_uri: bool,
|
||||
link_rel: Option<&'static str>,
|
||||
remove_tags: HashSet<&'static str>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
fn sanitize(input: &str, config: &SanitizerConfig) -> String {
|
||||
let allowed_tags = config.allowed_tags.clone();
|
||||
let remove_tags = config.remove_tags.clone();
|
||||
let generic_attrs: HashSet<&str> = config
|
||||
.extra_generic_attrs
|
||||
.iter()
|
||||
.copied()
|
||||
.chain(["lang", "title"])
|
||||
.collect();
|
||||
let tag_attrs_map: std::collections::HashMap<&str, HashSet<&str>> = {
|
||||
let mut m = std::collections::HashMap::new();
|
||||
let base = [
|
||||
("a", vec!["href", "hreflang"]),
|
||||
("bdo", vec!["dir"]),
|
||||
("blockquote", vec!["cite"]),
|
||||
("col", vec!["align", "char", "charoff", "span"]),
|
||||
("colgroup", vec!["align", "char", "charoff", "span"]),
|
||||
("del", vec!["cite", "datetime"]),
|
||||
("hr", vec!["align", "size", "width"]),
|
||||
("img", vec!["align", "alt", "height", "src", "width"]),
|
||||
("ins", vec!["cite", "datetime"]),
|
||||
("ol", vec!["start"]),
|
||||
("q", vec!["cite"]),
|
||||
("table", vec!["align", "char", "charoff", "summary"]),
|
||||
("tbody", vec!["align", "char", "charoff"]),
|
||||
("td", vec!["align", "char", "charoff", "colspan", "headers", "rowspan"]),
|
||||
("tfoot", vec!["align", "char", "charoff"]),
|
||||
("th", vec!["align", "char", "charoff", "colspan", "headers", "rowspan", "scope"]),
|
||||
("thead", vec!["align", "char", "charoff"]),
|
||||
("tr", vec!["align", "char", "charoff"]),
|
||||
];
|
||||
for (tag, attrs) in &base {
|
||||
m.insert(*tag, attrs.iter().copied().collect());
|
||||
}
|
||||
for (tag, attrs) in &config.extra_tag_attrs {
|
||||
m.entry(tag)
|
||||
.or_insert_with(HashSet::new)
|
||||
.extend(attrs.iter().copied());
|
||||
}
|
||||
m
|
||||
};
|
||||
let allowed_schemes = config.allowed_schemes.clone();
|
||||
let allow_data_uri = config.allow_data_uri;
|
||||
let link_rel = config.link_rel;
|
||||
|
||||
let element_handler = move |el: &mut lol_html::html_content::Element| {
|
||||
let tag = el.tag_name().to_lowercase();
|
||||
|
||||
if remove_tags.contains(tag.as_str()) {
|
||||
el.remove();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !allowed_tags.contains(tag.as_str()) {
|
||||
el.remove_and_keep_content();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let allowed_for_tag: HashSet<&str> = {
|
||||
let mut s = generic_attrs.clone();
|
||||
if let Some(tag_specific) = tag_attrs_map.get(tag.as_str()) {
|
||||
s.extend(tag_specific.iter().copied());
|
||||
}
|
||||
s
|
||||
};
|
||||
|
||||
let attrs_to_remove: Vec<String> = el
|
||||
.attributes()
|
||||
.iter()
|
||||
.filter_map(|attr| {
|
||||
let name = attr.name();
|
||||
let name_lower = name.to_lowercase();
|
||||
if allowed_for_tag.contains(name_lower.as_str()) {
|
||||
if name_lower == "href" || name_lower == "src" || name_lower == "cite" {
|
||||
let val = attr.value();
|
||||
if !is_safe_url(&val, &allowed_schemes, allow_data_uri) {
|
||||
return Some(name);
|
||||
}
|
||||
}
|
||||
None
|
||||
} else {
|
||||
Some(name)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
for attr_name in attrs_to_remove {
|
||||
el.remove_attribute(&attr_name);
|
||||
}
|
||||
|
||||
if link_rel.is_some() && tag == "a" {
|
||||
if let Some(rel) = link_rel {
|
||||
let existing = el.get_attribute("rel").unwrap_or_default();
|
||||
if existing != rel {
|
||||
el.set_attribute("rel", rel).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
lol_html::rewrite_str(
|
||||
input,
|
||||
lol_html::RewriteStrSettings {
|
||||
element_content_handlers: vec![lol_html::element!("*", element_handler)],
|
||||
document_content_handlers: vec![lol_html::doc_comments!(|c| {
|
||||
c.remove();
|
||||
Ok(())
|
||||
})],
|
||||
..lol_html::RewriteStrSettings::new()
|
||||
},
|
||||
)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
pub fn clean_html(input: &str) -> String {
|
||||
let config = SanitizerConfig {
|
||||
allowed_tags: default_allowed_tags(),
|
||||
extra_generic_attrs: vec![
|
||||
"class",
|
||||
"aria-hidden",
|
||||
"aria-label",
|
||||
"id",
|
||||
"role",
|
||||
"accesskey",
|
||||
"title",
|
||||
],
|
||||
extra_tag_attrs: vec![
|
||||
("a", vec!["class", "aria-hidden", "aria-label"]),
|
||||
("span", vec!["class"]),
|
||||
("h1", vec!["id", "class"]),
|
||||
("h2", vec!["id", "class"]),
|
||||
("h3", vec!["id", "class"]),
|
||||
("h4", vec!["id", "class"]),
|
||||
("h5", vec!["id", "class"]),
|
||||
("h6", vec!["id", "class"]),
|
||||
],
|
||||
allowed_schemes: default_allowed_schemes(),
|
||||
allow_data_uri: true,
|
||||
link_rel: Some("noopener noreferrer"),
|
||||
remove_tags: clean_content_tags(),
|
||||
};
|
||||
sanitize(input, &config)
|
||||
}
|
||||
|
||||
#[cfg(feature = "server")]
|
||||
pub fn clean_comment_html(input: &str) -> String {
|
||||
let mut tags = default_allowed_tags();
|
||||
tags.remove("img");
|
||||
tags.remove("details");
|
||||
tags.remove("summary");
|
||||
|
||||
let config = SanitizerConfig {
|
||||
allowed_tags: tags,
|
||||
extra_generic_attrs: vec![
|
||||
"class",
|
||||
"title",
|
||||
"aria-hidden",
|
||||
"aria-label",
|
||||
"role",
|
||||
"accesskey",
|
||||
],
|
||||
extra_tag_attrs: vec![
|
||||
("a", vec!["class", "aria-hidden", "aria-label"]),
|
||||
("span", vec!["class"]),
|
||||
],
|
||||
allowed_schemes: default_allowed_schemes(),
|
||||
allow_data_uri: false,
|
||||
link_rel: Some("nofollow noopener"),
|
||||
remove_tags: clean_content_tags(),
|
||||
};
|
||||
sanitize(input, &config)
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user