Manually remove images instead of parsing
Instead of using all kinds of difficult libraries, just replace the string "src" with "data-source". This covers most cases of removing images. This also removes the previously inlined kuchiki and sanitize-html-rs libraries. Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
parent
9d41fcd463
commit
abf4c787ab
|
@ -62,12 +62,6 @@ version = "3.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
|
@ -93,12 +87,6 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "convert_case"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.2"
|
||||
|
@ -118,46 +106,6 @@ dependencies = [
|
|||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.27.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa",
|
||||
"matches",
|
||||
"phf 0.8.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"smallvec",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser-macros"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
|
||||
dependencies = [
|
||||
"convert_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustc_version",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.3"
|
||||
|
@ -168,21 +116,6 @@ dependencies = [
|
|||
"crypto-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa-short"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6"
|
||||
dependencies = [
|
||||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.31"
|
||||
|
@ -192,34 +125,6 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
|
||||
dependencies = [
|
||||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.5"
|
||||
|
@ -230,48 +135,6 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.9.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "imap"
|
||||
version = "2.4.1"
|
||||
|
@ -296,31 +159,6 @@ dependencies = [
|
|||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.57"
|
||||
|
@ -330,18 +168,6 @@ dependencies = [
|
|||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kuchiki"
|
||||
version = "0.8.1"
|
||||
dependencies = [
|
||||
"cssparser",
|
||||
"html5ever",
|
||||
"indexmap",
|
||||
"matches",
|
||||
"selectors",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
|
@ -367,16 +193,6 @@ version = "0.2.126"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.17"
|
||||
|
@ -386,12 +202,6 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mail-parser"
|
||||
version = "0.5.0"
|
||||
|
@ -402,38 +212,12 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf 0.10.1",
|
||||
"phf_codegen 0.10.0",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
|
||||
|
||||
[[package]]
|
||||
name = "newsletter-to-web"
|
||||
version = "0.1.0"
|
||||
|
@ -442,16 +226,9 @@ dependencies = [
|
|||
"imap",
|
||||
"mail-parser",
|
||||
"rustls-connector",
|
||||
"sanitize_html",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodrop"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "5.1.2"
|
||||
|
@ -488,139 +265,6 @@ version = "1.12.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared 0.8.0",
|
||||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
|
||||
dependencies = [
|
||||
"phf_generator 0.8.0",
|
||||
"phf_shared 0.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
|
||||
dependencies = [
|
||||
"phf_shared 0.8.0",
|
||||
"rand 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
|
||||
dependencies = [
|
||||
"phf_generator 0.8.0",
|
||||
"phf_shared 0.8.0",
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.39"
|
||||
|
@ -639,96 +283,6 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
"libc",
|
||||
"rand_chacha 0.2.2",
|
||||
"rand_core 0.5.1",
|
||||
"rand_hc",
|
||||
"rand_pcg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha 0.3.1",
|
||||
"rand_core 0.6.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.6.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||
dependencies = [
|
||||
"getrandom 0.2.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
|
||||
dependencies = [
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
|
||||
dependencies = [
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.6"
|
||||
|
@ -746,15 +300,6 @@ version = "0.6.26"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64"
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.16.20"
|
||||
|
@ -770,15 +315,6 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
|
||||
dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.20.6"
|
||||
|
@ -809,22 +345,6 @@ version = "1.0.10"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
|
||||
|
||||
[[package]]
|
||||
name = "sanitize_html"
|
||||
version = "0.7.0"
|
||||
dependencies = [
|
||||
"html5ever",
|
||||
"kuchiki",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.7.0"
|
||||
|
@ -835,32 +355,6 @@ dependencies = [
|
|||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cssparser",
|
||||
"derive_more",
|
||||
"fxhash",
|
||||
"log",
|
||||
"matches",
|
||||
"phf 0.8.0",
|
||||
"phf_codegen 0.8.0",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
"thin-slice",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a41d061efea015927ac527063765e73601444cdc344ba855bc7bd44578b25e1c"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.137"
|
||||
|
@ -881,16 +375,6 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
|
||||
dependencies = [
|
||||
"nodrop",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.2"
|
||||
|
@ -902,62 +386,18 @@ dependencies = [
|
|||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"phf_shared 0.10.0",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.96"
|
||||
|
@ -969,37 +409,6 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"remove_dir_all",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thin-slice"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
|
@ -1007,7 +416,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||
"wasi",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
|
@ -1029,24 +438,12 @@ version = "0.7.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
||||
|
||||
[[package]]
|
||||
name = "utf-8"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.9.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.0+wasi-snapshot-preview1"
|
||||
|
@ -1157,46 +554,3 @@ name = "winapi-x86_64-pc-windows-gnu"
|
|||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
|
||||
dependencies = [
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
|
||||
|
|
|
@ -2,6 +2,4 @@
|
|||
|
||||
members = [
|
||||
"bin",
|
||||
"sanitize-html-rs",
|
||||
"kuchiki",
|
||||
]
|
||||
|
|
|
@ -11,5 +11,4 @@ base16ct = { version = "^0.1.0", features = [ "alloc" ] }
|
|||
imap = { version = "^2.4.1", default-features = false }
|
||||
mail-parser = "^0.5.0"
|
||||
rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] }
|
||||
sanitize_html = { path = "../sanitize-html-rs" }
|
||||
sha2 = "^0.10.2"
|
||||
|
|
|
@ -8,13 +8,11 @@ use std::{
|
|||
|
||||
use mail_parser::Message as MpMessage;
|
||||
|
||||
use sanitize_html::{rules::Element, sanitize_str};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
extern crate imap;
|
||||
extern crate mail_parser;
|
||||
extern crate rustls_connector;
|
||||
extern crate sanitize_html;
|
||||
extern crate sha2;
|
||||
|
||||
use message_reader::{EmailReader, TestMessagesReader};
|
||||
|
@ -50,9 +48,6 @@ fn main() {
|
|||
println!("Processing message {}", msg.get_uid());
|
||||
|
||||
let parsed = msg.get_parsed().expect("A parsed messsage.");
|
||||
let title = parsed.get_subject().expect("Expected a subject");
|
||||
|
||||
println!("{}", &title);
|
||||
|
||||
let html_body = parsed.get_html_body(0).expect("Could not read html body");
|
||||
let processed_html = process_html(&html_body).expect("Could not process the HTML");
|
||||
|
@ -92,27 +87,8 @@ fn get_path(parsed: &MpMessage, msg: &Message) -> String {
|
|||
format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned()
|
||||
}
|
||||
|
||||
fn process_html(input: &str) -> Result<String, sanitize_html::errors::SanitizeError> {
|
||||
let mut rules = sanitize_html::rules::predefined::relaxed().delete("style");
|
||||
|
||||
rules
|
||||
.allowed_elements
|
||||
.get_mut("img")
|
||||
.unwrap()
|
||||
.attribute_rules
|
||||
.rename("src", "data-source");
|
||||
|
||||
let mut span = Element::new("span");
|
||||
|
||||
span.attribute_rules
|
||||
.modify("style", Box::new(|_i| "".to_string()));
|
||||
|
||||
let rules = rules.element(span);
|
||||
|
||||
//rules.allowed_elements.remove_entry("img");
|
||||
|
||||
sanitize_str(&rules, input)
|
||||
//Ok(input.to_owned())
|
||||
fn process_html(input: &str) -> Result<String, ()> {
|
||||
Ok(input.replace("src", "data-source"))
|
||||
}
|
||||
|
||||
fn write_to_test_path(msg: &Message) {
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
target
|
||||
Cargo.lock
|
||||
.cargo/config
|
|
@ -1,6 +0,0 @@
|
|||
sudo: false
|
||||
language: rust
|
||||
rust:
|
||||
- nightly
|
||||
- beta
|
||||
- stable
|
|
@ -1,22 +0,0 @@
|
|||
[package]
|
||||
name = "kuchiki"
|
||||
version = "0.8.1"
|
||||
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
|
||||
license = "MIT"
|
||||
description = "(朽木) HTML/XML tree manipulation library"
|
||||
repository = "https://github.com/kuchiki-rs/kuchiki"
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "kuchiki"
|
||||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
cssparser = "^0.27"
|
||||
matches = "^0.1.4"
|
||||
html5ever = "^0.26"
|
||||
selectors = "^0.22"
|
||||
indexmap = "^1.6.0"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
|
@ -1,23 +0,0 @@
|
|||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -1,10 +0,0 @@
|
|||
Kuchiki (朽木)
|
||||
==============
|
||||
|
||||
HTML/XML¹ tree manipulation library for Rust.
|
||||
|
||||
[Documentation](https://docs.rs/kuchiki/)
|
||||
|
||||
See [users.rust-lang.org discussion](http://users.rust-lang.org/t/kuchiki-a-vaporware-html-xml-tree-manipulation-library/435).
|
||||
|
||||
¹ There is no support for XML syntax yet. The plan is to integrate with an existing parser.
|
|
@ -1,3 +0,0 @@
|
|||
<meta http-equiv="refresh" content="0; url=https://docs.rs/kuchiki/">
|
||||
<link rel="canonical" href="https://docs.rs/kuchiki/">
|
||||
<a href="https://docs.rs/kuchiki/">Moved to docs.rs</a>
|
|
@ -1,3 +0,0 @@
|
|||
<meta http-equiv="refresh" content="0; url=https://docs.rs/kuchiki/">
|
||||
<link rel="canonical" href="https://docs.rs/kuchiki/">
|
||||
<a href="https://docs.rs/kuchiki/">Moved to docs.rs</a>
|
|
@ -1,48 +0,0 @@
|
|||
extern crate kuchiki;
|
||||
|
||||
use kuchiki::traits::*;
|
||||
|
||||
fn main() {
|
||||
let html = r"
|
||||
<DOCTYPE html>
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<h1>Example</h1>
|
||||
<p class='foo'>Hello, world!</p>
|
||||
<p class='foo'>I love HTML</p>
|
||||
</body>
|
||||
</html>
|
||||
";
|
||||
let css_selector = ".foo";
|
||||
|
||||
let document = kuchiki::parse_html().one(html);
|
||||
|
||||
for css_match in document.select(css_selector).unwrap() {
|
||||
// css_match is a NodeDataRef, but most of the interesting methods are
|
||||
// on NodeRef. Let's get the underlying NodeRef.
|
||||
let as_node = css_match.as_node();
|
||||
|
||||
// In this example, as_node represents an HTML node like
|
||||
//
|
||||
// <p class='foo'>Hello world!</p>"
|
||||
//
|
||||
// Which is distinct from just 'Hello world!'. To get rid of that <p>
|
||||
// tag, we're going to get each element's first child, which will be
|
||||
// a "text" node.
|
||||
//
|
||||
// There are other kinds of nodes, of course. The possibilities are all
|
||||
// listed in the `NodeData` enum in this crate.
|
||||
let text_node = as_node.first_child().unwrap();
|
||||
|
||||
// Let's get the actual text in this text node. A text node wraps around
|
||||
// a RefCell<String>, so we need to call borrow() to get a &str out.
|
||||
let text = text_node.as_text().unwrap().borrow();
|
||||
|
||||
// Prints:
|
||||
//
|
||||
// "Hello, world!"
|
||||
// "I love HTML"
|
||||
println!("{:?}", text);
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
extern crate kuchiki;
|
||||
|
||||
fn main() {
|
||||
let mut depth = 2;
|
||||
// 20 M nodes is a few GB of memory.
|
||||
while depth <= 20_000_000 {
|
||||
let mut node = kuchiki::NodeRef::new_text("");
|
||||
for _ in 0..depth {
|
||||
let parent = kuchiki::NodeRef::new_text("");
|
||||
parent.append(node);
|
||||
node = parent;
|
||||
}
|
||||
|
||||
println!("Trying to drop {} nodes...", depth);
|
||||
// Without an explicit `impl Drop for Node`,
|
||||
// depth = 20_000 causes "thread '<main>' has overflowed its stack"
|
||||
// on my machine (Linux x86_64).
|
||||
::std::mem::drop(node);
|
||||
|
||||
depth *= 10;
|
||||
}
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
use html5ever::{LocalName, Namespace, Prefix};
|
||||
use indexmap::{map::Entry, IndexMap};
|
||||
|
||||
/// Convenience wrapper around a indexmap that adds method for attributes in the null namespace.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Attributes {
|
||||
/// A map of attributes whose name can have namespaces.
|
||||
pub map: IndexMap<ExpandedName, Attribute>,
|
||||
}
|
||||
|
||||
/// <https://www.w3.org/TR/REC-xml-names/#dt-expname>
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
|
||||
pub struct ExpandedName {
|
||||
/// Namespace URL
|
||||
pub ns: Namespace,
|
||||
/// "Local" part of the name
|
||||
pub local: LocalName,
|
||||
}
|
||||
|
||||
impl ExpandedName {
|
||||
/// Trivial constructor
|
||||
pub fn new<N: Into<Namespace>, L: Into<LocalName>>(ns: N, local: L) -> Self {
|
||||
ExpandedName {
|
||||
ns: ns.into(),
|
||||
local: local.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The non-identifying parts of an attribute
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Attribute {
|
||||
/// The namespace prefix, if any
|
||||
pub prefix: Option<Prefix>,
|
||||
/// The attribute value
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
impl Attributes {
|
||||
/// Like IndexMap::contains
|
||||
pub fn contains<A: Into<LocalName>>(&self, local_name: A) -> bool {
|
||||
self.map.contains_key(&ExpandedName::new(ns!(), local_name))
|
||||
}
|
||||
|
||||
/// Like IndexMap::get
|
||||
pub fn get<A: Into<LocalName>>(&self, local_name: A) -> Option<&str> {
|
||||
self.map
|
||||
.get(&ExpandedName::new(ns!(), local_name))
|
||||
.map(|attr| &*attr.value)
|
||||
}
|
||||
|
||||
/// Like IndexMap::get_mut
|
||||
pub fn get_mut<A: Into<LocalName>>(&mut self, local_name: A) -> Option<&mut String> {
|
||||
self.map
|
||||
.get_mut(&ExpandedName::new(ns!(), local_name))
|
||||
.map(|attr| &mut attr.value)
|
||||
}
|
||||
|
||||
/// Like IndexMap::entry
|
||||
pub fn entry<A: Into<LocalName>>(&mut self, local_name: A) -> Entry<ExpandedName, Attribute> {
|
||||
self.map.entry(ExpandedName::new(ns!(), local_name))
|
||||
}
|
||||
|
||||
/// Like IndexMap::insert
|
||||
pub fn insert<A: Into<LocalName>>(
|
||||
&mut self,
|
||||
local_name: A,
|
||||
value: String,
|
||||
) -> Option<Attribute> {
|
||||
self.map.insert(
|
||||
ExpandedName::new(ns!(), local_name),
|
||||
Attribute {
|
||||
prefix: None,
|
||||
value,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Like IndexMap::remove
|
||||
pub fn remove<A: Into<LocalName>>(&mut self, local_name: A) -> Option<Attribute> {
|
||||
self.map.remove(&ExpandedName::new(ns!(), local_name))
|
||||
}
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
//! Specialized methods for `Cell` of some specific `!Copy` types,
|
||||
//! allowing limited access to a value without moving it of the cell.
|
||||
//!
|
||||
//!
|
||||
//! # Soundness
|
||||
//!
|
||||
//! These methods use and `Cell::as_ptr` and `unsafe`.
|
||||
//! Their soundness lies in that:
|
||||
//!
|
||||
//! * `Cell<T>: !Sync` for any `T`, so no other thread is accessing this cell.
|
||||
//! * For the duration of the raw pointer access,
|
||||
//! this thread only runs code that is known to not access the same cell again.
|
||||
//! In particular, no method of a type paramater is called.
|
||||
//! For example, `clone_inner` would be unsound to generalize to any `Cell<T>`
|
||||
//! because it would involve running arbitrary code through `T::clone`
|
||||
//! and provide that code with a reference to the inside of the cell.
|
||||
//!
|
||||
//! ```rust
|
||||
//! struct Evil(Box<u32>, Rc<Cell<Option<Evil>>>);
|
||||
//! impl Clone for Evil {
|
||||
//! fn clone(&self) -> Self {
|
||||
//! mem::drop(self.1.take()); // Mess with the "other" node, which might be `self`.
|
||||
//! Evil(
|
||||
//! self.0.clone(), // possible use after free!
|
||||
//! Rc::new(Cell::new(None))
|
||||
//! )
|
||||
//! }
|
||||
//! }
|
||||
//! let a = Rc::new(Cell::new(None));
|
||||
//! a.set(Some(Evil(Box::new(5), a.clone()))); // Make a reference cycle.
|
||||
//! a.clone_inner();
|
||||
//! ```
|
||||
//!
|
||||
//! `Rc<T>::clone` and `Weak<T>::clone` do not have this problem
|
||||
//! as they only increment reference counts and never call `T::clone`.
|
||||
//!
|
||||
//!
|
||||
//! # Alternative
|
||||
//!
|
||||
//! To avoid using `unsafe` entirely, operating on a `T: !Copy` value inside a `Cell<T>`
|
||||
//! would require temporarily replacing it with a default value:
|
||||
//!
|
||||
//! ```rust
|
||||
//! fn option_dance<T, F, R>(cell: &Cell<T>, f: F) -> R
|
||||
//! where T: Default, F: FnOnce(&mut T) -> R
|
||||
//! {
|
||||
//! let mut value = cell.take();
|
||||
//! let result = f(&mut value);
|
||||
//! cell.set(value);
|
||||
//! result
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! It would be worth exploring whether LLVM can reliably optimize away these extra moves
|
||||
//! and compile the `Option` dance to assembly similar to that of the `unsafe` operation.
|
||||
|
||||
use std::cell::Cell;
|
||||
use std::rc::{Rc, Weak};
|
||||
|
||||
pub trait CellOption {
|
||||
fn is_none(&self) -> bool;
|
||||
}
|
||||
|
||||
impl<T> CellOption for Cell<Option<T>> {
|
||||
#[inline]
|
||||
fn is_none(&self) -> bool {
|
||||
unsafe { (*self.as_ptr()).is_none() }
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CellOptionWeak<T> {
|
||||
fn upgrade(&self) -> Option<Rc<T>>;
|
||||
fn clone_inner(&self) -> Option<Weak<T>>;
|
||||
}
|
||||
|
||||
impl<T> CellOptionWeak<T> for Cell<Option<Weak<T>>> {
|
||||
#[inline]
|
||||
fn upgrade(&self) -> Option<Rc<T>> {
|
||||
unsafe { (*self.as_ptr()).as_ref().and_then(Weak::upgrade) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn clone_inner(&self) -> Option<Weak<T>> {
|
||||
unsafe { (*self.as_ptr()).clone() }
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CellOptionRc<T> {
|
||||
/// Return `Some` if this `Rc` is the only strong reference count,
|
||||
/// even if there are weak references.
|
||||
fn take_if_unique_strong(&self) -> Option<Rc<T>>;
|
||||
fn clone_inner(&self) -> Option<Rc<T>>;
|
||||
}
|
||||
|
||||
impl<T> CellOptionRc<T> for Cell<Option<Rc<T>>> {
|
||||
#[inline]
|
||||
fn take_if_unique_strong(&self) -> Option<Rc<T>> {
|
||||
unsafe {
|
||||
match *self.as_ptr() {
|
||||
None => None,
|
||||
Some(ref rc) if Rc::strong_count(rc) > 1 => None,
|
||||
// Not borrowing the `Rc<T>` here
|
||||
// as we would be invalidating that borrow while it is outstanding:
|
||||
Some(_) => self.take(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn clone_inner(&self) -> Option<Rc<T>> {
|
||||
unsafe { (*self.as_ptr()).clone() }
|
||||
}
|
||||
}
|
|
@ -1,452 +0,0 @@
|
|||
//! Node iterators
|
||||
|
||||
use std::borrow::Borrow;
|
||||
use std::cell::RefCell;
|
||||
use std::iter::Rev;
|
||||
|
||||
use crate::node_data_ref::NodeDataRef;
|
||||
use crate::select::Selectors;
|
||||
use crate::tree::{ElementData, NodeRef};
|
||||
|
||||
impl NodeRef {
|
||||
/// Return an iterator of references to this node and its ancestors.
|
||||
#[inline]
|
||||
pub fn inclusive_ancestors(&self) -> Ancestors {
|
||||
Ancestors(Some(self.clone()))
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s ancestors.
|
||||
#[inline]
|
||||
pub fn ancestors(&self) -> Ancestors {
|
||||
Ancestors(self.parent())
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node and the siblings before it.
|
||||
#[inline]
|
||||
pub fn inclusive_preceding_siblings(&self) -> Rev<Siblings> {
|
||||
match self.parent() {
|
||||
Some(parent) => {
|
||||
let first_sibling = parent.first_child().unwrap();
|
||||
debug_assert!(self.previous_sibling().is_some() || *self == first_sibling);
|
||||
Siblings(Some(State {
|
||||
next: first_sibling,
|
||||
next_back: self.clone(),
|
||||
}))
|
||||
}
|
||||
None => {
|
||||
debug_assert!(self.previous_sibling().is_none());
|
||||
Siblings(Some(State {
|
||||
next: self.clone(),
|
||||
next_back: self.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
.rev()
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s siblings before it.
|
||||
#[inline]
|
||||
pub fn preceding_siblings(&self) -> Rev<Siblings> {
|
||||
match (self.parent(), self.previous_sibling()) {
|
||||
(Some(parent), Some(previous_sibling)) => {
|
||||
let first_sibling = parent.first_child().unwrap();
|
||||
Siblings(Some(State {
|
||||
next: first_sibling,
|
||||
next_back: previous_sibling,
|
||||
}))
|
||||
}
|
||||
_ => Siblings(None),
|
||||
}
|
||||
.rev()
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node and the siblings after it.
|
||||
#[inline]
|
||||
pub fn inclusive_following_siblings(&self) -> Siblings {
|
||||
match self.parent() {
|
||||
Some(parent) => {
|
||||
let last_sibling = parent.last_child().unwrap();
|
||||
debug_assert!(self.next_sibling().is_some() || *self == last_sibling);
|
||||
Siblings(Some(State {
|
||||
next: self.clone(),
|
||||
next_back: last_sibling,
|
||||
}))
|
||||
}
|
||||
None => {
|
||||
debug_assert!(self.next_sibling().is_none());
|
||||
Siblings(Some(State {
|
||||
next: self.clone(),
|
||||
next_back: self.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s siblings after it.
|
||||
#[inline]
|
||||
pub fn following_siblings(&self) -> Siblings {
|
||||
match (self.parent(), self.next_sibling()) {
|
||||
(Some(parent), Some(next_sibling)) => {
|
||||
let last_sibling = parent.last_child().unwrap();
|
||||
Siblings(Some(State {
|
||||
next: next_sibling,
|
||||
next_back: last_sibling,
|
||||
}))
|
||||
}
|
||||
_ => Siblings(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s children.
|
||||
#[inline]
|
||||
pub fn children(&self) -> Siblings {
|
||||
match (self.first_child(), self.last_child()) {
|
||||
(Some(first_child), Some(last_child)) => Siblings(Some(State {
|
||||
next: first_child,
|
||||
next_back: last_child,
|
||||
})),
|
||||
(None, None) => Siblings(None),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node and its descendants, in tree order.
|
||||
///
|
||||
/// Parent nodes appear before the descendants.
|
||||
///
|
||||
/// Note: this is the `NodeEdge::Start` items from `traverse()`.
|
||||
#[inline]
|
||||
pub fn inclusive_descendants(&self) -> Descendants {
|
||||
Descendants(self.traverse_inclusive())
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s descendants, in tree order.
|
||||
///
|
||||
/// Parent nodes appear before the descendants.
|
||||
///
|
||||
/// Note: this is the `NodeEdge::Start` items from `traverse()`.
|
||||
#[inline]
|
||||
pub fn descendants(&self) -> Descendants {
|
||||
Descendants(self.traverse())
|
||||
}
|
||||
|
||||
/// Return an iterator of the start and end edges of this node and its descendants,
|
||||
/// in tree order.
|
||||
#[inline]
|
||||
pub fn traverse_inclusive(&self) -> Traverse {
|
||||
Traverse(Some(State {
|
||||
next: NodeEdge::Start(self.clone()),
|
||||
next_back: NodeEdge::End(self.clone()),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Return an iterator of the start and end edges of this node’s descendants,
|
||||
/// in tree order.
|
||||
#[inline]
|
||||
pub fn traverse(&self) -> Traverse {
|
||||
match (self.first_child(), self.last_child()) {
|
||||
(Some(first_child), Some(last_child)) => Traverse(Some(State {
|
||||
next: NodeEdge::Start(first_child),
|
||||
next_back: NodeEdge::End(last_child),
|
||||
})),
|
||||
(None, None) => Traverse(None),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of the inclusive descendants element that match the given selector list.
|
||||
#[inline]
|
||||
pub fn select(&self, selectors: &str) -> Result<Select<Elements<Descendants>>, ()> {
|
||||
self.inclusive_descendants().select(selectors)
|
||||
}
|
||||
|
||||
/// Return the first inclusive descendants element that match the given selector list.
|
||||
#[inline]
|
||||
pub fn select_first(&self, selectors: &str) -> Result<NodeDataRef<ElementData>, ()> {
|
||||
let mut elements = self.select(selectors)?;
|
||||
elements.next().ok_or(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct State<T> {
|
||||
next: T,
|
||||
next_back: T,
|
||||
}
|
||||
|
||||
/// A double-ended iterator of sibling nodes.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Siblings(Option<State<NodeRef>>);
|
||||
|
||||
macro_rules! siblings_next {
|
||||
($next: ident, $next_back: ident, $next_sibling: ident) => {
|
||||
fn $next(&mut self) -> Option<NodeRef> {
|
||||
#![allow(non_shorthand_field_patterns)]
|
||||
self.0.take().map(|State { $next: next, $next_back: next_back }| {
|
||||
if let Some(sibling) = next.$next_sibling() {
|
||||
if next != next_back {
|
||||
self.0 = Some(State { $next: sibling, $next_back: next_back })
|
||||
}
|
||||
}
|
||||
next
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Siblings {
|
||||
type Item = NodeRef;
|
||||
siblings_next!(next, next_back, next_sibling);
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Siblings {
|
||||
siblings_next!(next_back, next, previous_sibling);
|
||||
}
|
||||
|
||||
/// An iterator on ancestor nodes.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Ancestors(Option<NodeRef>);
|
||||
|
||||
impl Iterator for Ancestors {
|
||||
type Item = NodeRef;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<NodeRef> {
|
||||
self.0.take().map(|node| {
|
||||
self.0 = node.parent();
|
||||
node
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator of references to a given node and its descendants, in tree order.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Descendants(Traverse);
|
||||
|
||||
macro_rules! descendants_next {
|
||||
($next: ident) => {
|
||||
#[inline]
|
||||
fn $next(&mut self) -> Option<NodeRef> {
|
||||
loop {
|
||||
match (self.0).$next() {
|
||||
Some(NodeEdge::Start(node)) => return Some(node),
|
||||
Some(NodeEdge::End(_)) => {}
|
||||
None => return None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Descendants {
|
||||
type Item = NodeRef;
|
||||
descendants_next!(next);
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Descendants {
|
||||
descendants_next!(next_back);
|
||||
}
|
||||
|
||||
/// Marks either the start or the end of a node.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub enum NodeEdge<T> {
|
||||
/// Indicates that start of a node that has children.
|
||||
/// Yielded by `Traverse::next` before the node’s descendants.
|
||||
/// In HTML or XML, this corresponds to an opening tag like `<div>`
|
||||
Start(T),
|
||||
|
||||
/// Indicates that end of a node that has children.
|
||||
/// Yielded by `Traverse::next` after the node’s descendants.
|
||||
/// In HTML or XML, this corresponds to a closing tag like `</div>`
|
||||
End(T),
|
||||
}
|
||||
|
||||
/// An iterator of the start and end edges of the nodes in a given subtree.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Traverse(Option<State<NodeEdge<NodeRef>>>);
|
||||
|
||||
macro_rules! traverse_next {
|
||||
($next: ident, $next_back: ident, $first_child: ident, $next_sibling: ident, $Start: ident, $End: ident) => {
|
||||
fn $next(&mut self) -> Option<NodeEdge<NodeRef>> {
|
||||
#![allow(non_shorthand_field_patterns)]
|
||||
self.0.take().map(|State { $next: next, $next_back: next_back }| {
|
||||
if next != next_back {
|
||||
self.0 = match next {
|
||||
NodeEdge::$Start(ref node) => {
|
||||
match node.$first_child() {
|
||||
Some(child) => {
|
||||
Some(State { $next: NodeEdge::$Start(child), $next_back: next_back })
|
||||
}
|
||||
None => Some(State { $next: NodeEdge::$End(node.clone()), $next_back: next_back })
|
||||
}
|
||||
}
|
||||
NodeEdge::$End(ref node) => {
|
||||
match node.$next_sibling() {
|
||||
Some(sibling) => {
|
||||
Some(State { $next: NodeEdge::$Start(sibling), $next_back: next_back })
|
||||
}
|
||||
None => node.parent().map(|parent| {
|
||||
State { $next: NodeEdge::$End(parent), $next_back: next_back }
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
next
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Traverse {
|
||||
type Item = NodeEdge<NodeRef>;
|
||||
traverse_next!(next, next_back, first_child, next_sibling, Start, End);
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Traverse {
|
||||
traverse_next!(next_back, next, last_child, previous_sibling, End, Start);
|
||||
}
|
||||
|
||||
macro_rules! filter_map_like_iterator {
|
||||
(#[$doc: meta] $name: ident: $f: expr, $from: ty => $to: ty) => {
|
||||
#[$doc]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct $name<I>(pub I);
|
||||
|
||||
impl<I> Iterator for $name<I>
|
||||
where
|
||||
I: Iterator<Item = $from>,
|
||||
{
|
||||
type Item = $to;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<$to> {
|
||||
for x in self.0.by_ref() {
|
||||
if let Some(y) = ($f)(x) {
|
||||
return Some(y);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> DoubleEndedIterator for $name<I>
|
||||
where
|
||||
I: DoubleEndedIterator<Item = $from>,
|
||||
{
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<$to> {
|
||||
for x in self.0.by_ref().rev() {
|
||||
if let Some(y) = ($f)(x) {
|
||||
return Some(y);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
filter_map_like_iterator! {
|
||||
/// A node iterator adaptor that yields element nodes.
|
||||
Elements: NodeRef::into_element_ref, NodeRef => NodeDataRef<ElementData>
|
||||
}
|
||||
|
||||
filter_map_like_iterator! {
|
||||
/// A node iterator adaptor that yields comment nodes.
|
||||
Comments: NodeRef::into_comment_ref, NodeRef => NodeDataRef<RefCell<String>>
|
||||
}
|
||||
|
||||
filter_map_like_iterator! {
|
||||
/// A node iterator adaptor that yields text nodes.
|
||||
TextNodes: NodeRef::into_text_ref, NodeRef => NodeDataRef<RefCell<String>>
|
||||
}
|
||||
|
||||
/// An element iterator adaptor that yields elements maching given selectors.
|
||||
pub struct Select<I, S = Selectors>
|
||||
where
|
||||
I: Iterator<Item = NodeDataRef<ElementData>>,
|
||||
S: Borrow<Selectors>,
|
||||
{
|
||||
/// The underlying iterator.
|
||||
pub iter: I,
|
||||
|
||||
/// The selectors to be matched.
|
||||
pub selectors: S,
|
||||
}
|
||||
|
||||
impl<I, S> Iterator for Select<I, S>
|
||||
where
|
||||
I: Iterator<Item = NodeDataRef<ElementData>>,
|
||||
S: Borrow<Selectors>,
|
||||
{
|
||||
type Item = NodeDataRef<ElementData>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<NodeDataRef<ElementData>> {
|
||||
for element in self.iter.by_ref() {
|
||||
if self.selectors.borrow().matches(&element) {
|
||||
return Some(element);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, S> DoubleEndedIterator for Select<I, S>
|
||||
where
|
||||
I: DoubleEndedIterator<Item = NodeDataRef<ElementData>>,
|
||||
S: Borrow<Selectors>,
|
||||
{
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<NodeDataRef<ElementData>> {
|
||||
for element in self.iter.by_ref().rev() {
|
||||
if self.selectors.borrow().matches(&element) {
|
||||
return Some(element);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience methods for node iterators.
|
||||
pub trait NodeIterator: Sized + Iterator<Item = NodeRef> {
|
||||
/// Filter this element iterator to elements.
|
||||
#[inline]
|
||||
fn elements(self) -> Elements<Self> {
|
||||
Elements(self)
|
||||
}
|
||||
|
||||
/// Filter this node iterator to text nodes.
|
||||
#[inline]
|
||||
fn text_nodes(self) -> TextNodes<Self> {
|
||||
TextNodes(self)
|
||||
}
|
||||
|
||||
/// Filter this node iterator to comment nodes.
|
||||
#[inline]
|
||||
fn comments(self) -> Comments<Self> {
|
||||
Comments(self)
|
||||
}
|
||||
|
||||
/// Filter this node iterator to elements maching the given selectors.
|
||||
#[inline]
|
||||
fn select(self, selectors: &str) -> Result<Select<Elements<Self>>, ()> {
|
||||
self.elements().select(selectors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience methods for element iterators.
|
||||
pub trait ElementIterator: Sized + Iterator<Item = NodeDataRef<ElementData>> {
|
||||
/// Filter this element iterator to elements maching the given selectors.
|
||||
#[inline]
|
||||
fn select(self, selectors: &str) -> Result<Select<Self>, ()> {
|
||||
Selectors::compile(selectors).map(|s| Select {
|
||||
iter: self,
|
||||
selectors: s,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> NodeIterator for I where I: Iterator<Item = NodeRef> {}
|
||||
impl<I> ElementIterator for I where I: Iterator<Item = NodeDataRef<ElementData>> {}
|
|
@ -1,40 +0,0 @@
|
|||
/*!
|
||||
|
||||
Kuchiki (朽木), a HTML/XML tree manipulation library for Rust.
|
||||
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate html5ever;
|
||||
#[macro_use]
|
||||
extern crate matches;
|
||||
|
||||
mod attributes;
|
||||
mod cell_extras;
|
||||
pub mod iter;
|
||||
mod node_data_ref;
|
||||
mod parser;
|
||||
mod select;
|
||||
mod serializer;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
mod tree;
|
||||
|
||||
pub use attributes::{Attribute, Attributes, ExpandedName};
|
||||
pub use node_data_ref::NodeDataRef;
|
||||
pub use parser::{parse_html, parse_html_with_options, parse_fragment, ParseOpts, Sink};
|
||||
pub use select::{Selector, Selectors, Specificity};
|
||||
pub use tree::{Doctype, DocumentData, ElementData, Node, NodeData, NodeRef};
|
||||
|
||||
/// This module re-exports a number of traits that are useful when using Kuchiki.
|
||||
/// It can be used with:
|
||||
///
|
||||
/// ```rust
|
||||
/// use kuchiki::traits::*;
|
||||
/// ```
|
||||
pub mod traits {
|
||||
pub use html5ever::tendril::TendrilSink;
|
||||
pub use crate::iter::{ElementIterator, NodeIterator};
|
||||
}
|
|
@ -1,116 +0,0 @@
|
|||
use std::cell::RefCell;
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
use crate::tree::{Doctype, DocumentData, ElementData, Node, NodeRef};
|
||||
|
||||
impl NodeRef {
|
||||
/// If this node is an element, return a strong reference to element-specific data.
|
||||
#[inline]
|
||||
pub fn into_element_ref(self) -> Option<NodeDataRef<ElementData>> {
|
||||
NodeDataRef::new_opt(self, Node::as_element)
|
||||
}
|
||||
|
||||
/// If this node is a text node, return a strong reference to its contents.
|
||||
#[inline]
|
||||
pub fn into_text_ref(self) -> Option<NodeDataRef<RefCell<String>>> {
|
||||
NodeDataRef::new_opt(self, Node::as_text)
|
||||
}
|
||||
|
||||
/// If this node is a comment, return a strong reference to its contents.
|
||||
#[inline]
|
||||
pub fn into_comment_ref(self) -> Option<NodeDataRef<RefCell<String>>> {
|
||||
NodeDataRef::new_opt(self, Node::as_comment)
|
||||
}
|
||||
|
||||
/// If this node is a doctype, return a strong reference to doctype-specific data.
|
||||
#[inline]
|
||||
pub fn into_doctype_ref(self) -> Option<NodeDataRef<Doctype>> {
|
||||
NodeDataRef::new_opt(self, Node::as_doctype)
|
||||
}
|
||||
|
||||
/// If this node is a document, return a strong reference to document-specific data.
|
||||
#[inline]
|
||||
pub fn into_document_ref(self) -> Option<NodeDataRef<DocumentData>> {
|
||||
NodeDataRef::new_opt(self, Node::as_document)
|
||||
}
|
||||
}
|
||||
|
||||
/// Holds a strong reference to a node, but dereferences to some component inside of it.
|
||||
#[derive(Eq)]
|
||||
pub struct NodeDataRef<T> {
|
||||
_keep_alive: NodeRef,
|
||||
_reference: *const T,
|
||||
}
|
||||
|
||||
impl<T> NodeDataRef<T> {
|
||||
/// Create a `NodeDataRef` for a component in a given node.
|
||||
#[inline]
|
||||
pub fn new<F>(rc: NodeRef, f: F) -> NodeDataRef<T>
|
||||
where
|
||||
F: FnOnce(&Node) -> &T,
|
||||
{
|
||||
NodeDataRef {
|
||||
_reference: f(&*rc),
|
||||
_keep_alive: rc,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a `NodeDataRef` for and a component that may or may not be in a given node.
|
||||
#[inline]
|
||||
pub fn new_opt<F>(rc: NodeRef, f: F) -> Option<NodeDataRef<T>>
|
||||
where
|
||||
F: FnOnce(&Node) -> Option<&T>,
|
||||
{
|
||||
f(&*rc).map(|r| r as *const T).map(move |r| NodeDataRef {
|
||||
_reference: r,
|
||||
_keep_alive: rc,
|
||||
})
|
||||
}
|
||||
|
||||
/// Access the corresponding node.
|
||||
#[inline]
|
||||
pub fn as_node(&self) -> &NodeRef {
|
||||
&self._keep_alive
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for NodeDataRef<T> {
|
||||
type Target = T;
|
||||
#[inline]
|
||||
fn deref(&self) -> &T {
|
||||
unsafe { &*self._reference }
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(PartialEq)] would compare both fields
|
||||
impl<T> PartialEq for NodeDataRef<T> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self._keep_alive == other._keep_alive
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(Clone)] would have an unnecessary `T: Clone` bound
|
||||
impl<T> Clone for NodeDataRef<T> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
NodeDataRef {
|
||||
_keep_alive: self._keep_alive.clone(),
|
||||
_reference: self._reference,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug> fmt::Debug for NodeDataRef<T> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
fmt::Debug::fmt(&**self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeDataRef<ElementData> {
|
||||
/// Return the concatenation of all text nodes in this subtree.
|
||||
pub fn text_contents(&self) -> String {
|
||||
self.as_node().text_contents()
|
||||
}
|
||||
}
|
|
@ -1,241 +0,0 @@
|
|||
use html5ever::tendril::StrTendril;
|
||||
use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
|
||||
use html5ever::{self, Attribute, ExpandedName, QualName};
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::attributes;
|
||||
use crate::tree::NodeRef;
|
||||
|
||||
/// Options for the HTML parser.
|
||||
#[derive(Default)]
|
||||
pub struct ParseOpts {
|
||||
/// Options for the HTML tokenizer.
|
||||
pub tokenizer: html5ever::tokenizer::TokenizerOpts,
|
||||
|
||||
/// Options for the HTML tree builder.
|
||||
pub tree_builder: html5ever::tree_builder::TreeBuilderOpts,
|
||||
|
||||
/// A callback for HTML parse errors (which are never fatal).
|
||||
pub on_parse_error: Option<Box<dyn FnMut(Cow<'static, str>)>>,
|
||||
}
|
||||
|
||||
/// Parse an HTML document with html5ever and the default configuration.
|
||||
pub fn parse_html() -> html5ever::Parser<Sink> {
|
||||
parse_html_with_options(ParseOpts::default())
|
||||
}
|
||||
|
||||
/// Parse an HTML document with html5ever with custom configuration.
|
||||
pub fn parse_html_with_options(opts: ParseOpts) -> html5ever::Parser<Sink> {
|
||||
let sink = Sink {
|
||||
document_node: NodeRef::new_document(),
|
||||
on_parse_error: opts.on_parse_error,
|
||||
};
|
||||
let html5opts = html5ever::ParseOpts {
|
||||
tokenizer: opts.tokenizer,
|
||||
tree_builder: opts.tree_builder,
|
||||
};
|
||||
html5ever::parse_document(sink, html5opts)
|
||||
}
|
||||
|
||||
/// Parse an HTML fragment with html5ever and the default configuration.
|
||||
pub fn parse_fragment(ctx_name: QualName, ctx_attr: Vec<Attribute>) -> html5ever::Parser<Sink> {
|
||||
parse_fragment_with_options(ParseOpts::default(), ctx_name, ctx_attr)
|
||||
}
|
||||
|
||||
/// Parse an HTML fragment with html5ever with custom configuration.
|
||||
pub fn parse_fragment_with_options(opts: ParseOpts, ctx_name: QualName, ctx_attr: Vec<Attribute>) -> html5ever::Parser<Sink> {
|
||||
let sink = Sink {
|
||||
document_node: NodeRef::new_document(),
|
||||
on_parse_error: opts.on_parse_error,
|
||||
};
|
||||
let html5opts = html5ever::ParseOpts {
|
||||
tokenizer: opts.tokenizer,
|
||||
tree_builder: opts.tree_builder,
|
||||
};
|
||||
html5ever::parse_fragment(sink, html5opts, ctx_name, ctx_attr)
|
||||
}
|
||||
|
||||
/// Receives new tree nodes during parsing.
|
||||
pub struct Sink {
|
||||
document_node: NodeRef,
|
||||
on_parse_error: Option<Box<dyn FnMut(Cow<'static, str>)>>,
|
||||
}
|
||||
|
||||
impl TreeSink for Sink {
|
||||
type Output = NodeRef;
|
||||
|
||||
fn finish(self) -> NodeRef {
|
||||
self.document_node
|
||||
}
|
||||
|
||||
type Handle = NodeRef;
|
||||
|
||||
#[inline]
|
||||
fn parse_error(&mut self, message: Cow<'static, str>) {
|
||||
if let Some(ref mut handler) = self.on_parse_error {
|
||||
handler(message)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_document(&mut self) -> NodeRef {
|
||||
self.document_node.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
||||
self.document_node
|
||||
.as_document()
|
||||
.unwrap()
|
||||
._quirks_mode
|
||||
.set(mode)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn same_node(&self, x: &NodeRef, y: &NodeRef) -> bool {
|
||||
x == y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn elem_name<'a>(&self, target: &'a NodeRef) -> ExpandedName<'a> {
|
||||
target.as_element().unwrap().name.expanded()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_element(
|
||||
&mut self,
|
||||
name: QualName,
|
||||
attrs: Vec<Attribute>,
|
||||
_flags: ElementFlags,
|
||||
) -> NodeRef {
|
||||
NodeRef::new_element(
|
||||
name,
|
||||
attrs.into_iter().map(|attr| {
|
||||
let Attribute {
|
||||
name: QualName { prefix, ns, local },
|
||||
value,
|
||||
} = attr;
|
||||
let value = String::from(value);
|
||||
(
|
||||
attributes::ExpandedName { ns, local },
|
||||
attributes::Attribute { prefix, value },
|
||||
)
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_comment(&mut self, text: StrTendril) -> NodeRef {
|
||||
NodeRef::new_comment(text)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeRef {
|
||||
NodeRef::new_processing_instruction(target, data)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn append(&mut self, parent: &NodeRef, child: NodeOrText<NodeRef>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(node) => parent.append(node),
|
||||
NodeOrText::AppendText(text) => {
|
||||
if let Some(last_child) = parent.last_child() {
|
||||
if let Some(existing) = last_child.as_text() {
|
||||
existing.borrow_mut().push_str(&text);
|
||||
return;
|
||||
}
|
||||
}
|
||||
parent.append(NodeRef::new_text(text))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn append_before_sibling(&mut self, sibling: &NodeRef, child: NodeOrText<NodeRef>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(node) => sibling.insert_before(node),
|
||||
NodeOrText::AppendText(text) => {
|
||||
if let Some(previous_sibling) = sibling.previous_sibling() {
|
||||
if let Some(existing) = previous_sibling.as_text() {
|
||||
existing.borrow_mut().push_str(&text);
|
||||
return;
|
||||
}
|
||||
}
|
||||
sibling.insert_before(NodeRef::new_text(text))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
name: StrTendril,
|
||||
public_id: StrTendril,
|
||||
system_id: StrTendril,
|
||||
) {
|
||||
self.document_node
|
||||
.append(NodeRef::new_doctype(name, public_id, system_id))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn add_attrs_if_missing(&mut self, target: &NodeRef, attrs: Vec<Attribute>) {
|
||||
let element = target.as_element().unwrap();
|
||||
let mut attributes = element.attributes.borrow_mut();
|
||||
|
||||
for Attribute {
|
||||
name: QualName { prefix, ns, local },
|
||||
value,
|
||||
} in attrs
|
||||
{
|
||||
attributes
|
||||
.map
|
||||
.entry(attributes::ExpandedName { ns, local })
|
||||
.or_insert_with(|| {
|
||||
let value = String::from(value);
|
||||
attributes::Attribute { prefix, value }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn remove_from_parent(&mut self, target: &NodeRef) {
|
||||
target.detach()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reparent_children(&mut self, node: &NodeRef, new_parent: &NodeRef) {
|
||||
// FIXME: Can this be done more effciently in rctree,
|
||||
// by moving the whole linked list of children at once?
|
||||
for child in node.children() {
|
||||
new_parent.append(child)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mark_script_already_started(&mut self, _node: &NodeRef) {
|
||||
// FIXME: Is this useful outside of a browser?
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_template_contents(&mut self, target: &NodeRef) -> NodeRef {
|
||||
target
|
||||
.as_element()
|
||||
.unwrap()
|
||||
.template_contents
|
||||
.clone()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
element: &NodeRef,
|
||||
prev_element: &NodeRef,
|
||||
child: NodeOrText<NodeRef>,
|
||||
) {
|
||||
if element.parent().is_some() {
|
||||
self.append_before_sibling(element, child)
|
||||
} else {
|
||||
self.append(prev_element, child)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,433 +0,0 @@
|
|||
use crate::attributes::ExpandedName;
|
||||
use cssparser::{self, CowRcStr, ParseError, SourceLocation, ToCss};
|
||||
use html5ever::{LocalName, Namespace};
|
||||
use crate::iter::{NodeIterator, Select};
|
||||
use crate::node_data_ref::NodeDataRef;
|
||||
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
|
||||
use selectors::context::QuirksMode;
|
||||
use selectors::parser::SelectorParseErrorKind;
|
||||
use selectors::parser::{
|
||||
NonTSPseudoClass, Parser, Selector as GenericSelector, SelectorImpl, SelectorList,
|
||||
};
|
||||
use selectors::{self, matching, OpaqueElement};
|
||||
use std::fmt;
|
||||
use crate::tree::{ElementData, Node, NodeData, NodeRef};
|
||||
|
||||
/// The definition of whitespace per CSS Selectors Level 3 § 4.
|
||||
///
|
||||
/// Copied from rust-selectors.
|
||||
static SELECTOR_WHITESPACE: &[char] = &[' ', '\t', '\n', '\r', '\x0C'];
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KuchikiSelectors;
|
||||
|
||||
impl SelectorImpl for KuchikiSelectors {
|
||||
type AttrValue = String;
|
||||
type Identifier = LocalName;
|
||||
type ClassName = LocalName;
|
||||
type LocalName = LocalName;
|
||||
type PartName = LocalName;
|
||||
type NamespacePrefix = LocalName;
|
||||
type NamespaceUrl = Namespace;
|
||||
type BorrowedNamespaceUrl = Namespace;
|
||||
type BorrowedLocalName = LocalName;
|
||||
|
||||
type NonTSPseudoClass = PseudoClass;
|
||||
type PseudoElement = PseudoElement;
|
||||
|
||||
type ExtraMatchingData = ();
|
||||
}
|
||||
|
||||
struct KuchikiParser;
|
||||
|
||||
impl<'i> Parser<'i> for KuchikiParser {
|
||||
type Impl = KuchikiSelectors;
|
||||
type Error = SelectorParseErrorKind<'i>;
|
||||
|
||||
fn parse_non_ts_pseudo_class(
|
||||
&self,
|
||||
location: SourceLocation,
|
||||
name: CowRcStr<'i>,
|
||||
) -> Result<PseudoClass, ParseError<'i, SelectorParseErrorKind<'i>>> {
|
||||
use self::PseudoClass::*;
|
||||
if name.eq_ignore_ascii_case("any-link") {
|
||||
Ok(AnyLink)
|
||||
} else if name.eq_ignore_ascii_case("link") {
|
||||
Ok(Link)
|
||||
} else if name.eq_ignore_ascii_case("visited") {
|
||||
Ok(Visited)
|
||||
} else if name.eq_ignore_ascii_case("active") {
|
||||
Ok(Active)
|
||||
} else if name.eq_ignore_ascii_case("focus") {
|
||||
Ok(Focus)
|
||||
} else if name.eq_ignore_ascii_case("hover") {
|
||||
Ok(Hover)
|
||||
} else if name.eq_ignore_ascii_case("enabled") {
|
||||
Ok(Enabled)
|
||||
} else if name.eq_ignore_ascii_case("disabled") {
|
||||
Ok(Disabled)
|
||||
} else if name.eq_ignore_ascii_case("checked") {
|
||||
Ok(Checked)
|
||||
} else if name.eq_ignore_ascii_case("indeterminate") {
|
||||
Ok(Indeterminate)
|
||||
} else {
|
||||
Err(
|
||||
location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement(
|
||||
name,
|
||||
)),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Hash)]
|
||||
pub enum PseudoClass {
|
||||
AnyLink,
|
||||
Link,
|
||||
Visited,
|
||||
Active,
|
||||
Focus,
|
||||
Hover,
|
||||
Enabled,
|
||||
Disabled,
|
||||
Checked,
|
||||
Indeterminate,
|
||||
}
|
||||
|
||||
impl NonTSPseudoClass for PseudoClass {
|
||||
type Impl = KuchikiSelectors;
|
||||
|
||||
fn is_active_or_hover(&self) -> bool {
|
||||
matches!(*self, PseudoClass::Active | PseudoClass::Hover)
|
||||
}
|
||||
|
||||
fn is_user_action_state(&self) -> bool {
|
||||
matches!(*self, PseudoClass::Active | PseudoClass::Hover | PseudoClass::Focus)
|
||||
}
|
||||
|
||||
fn has_zero_specificity(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl ToCss for PseudoClass {
|
||||
fn to_css<W>(&self, dest: &mut W) -> fmt::Result
|
||||
where
|
||||
W: fmt::Write,
|
||||
{
|
||||
dest.write_str(match *self {
|
||||
PseudoClass::AnyLink => ":any-link",
|
||||
PseudoClass::Link => ":link",
|
||||
PseudoClass::Visited => ":visited",
|
||||
PseudoClass::Active => ":active",
|
||||
PseudoClass::Focus => ":focus",
|
||||
PseudoClass::Hover => ":hover",
|
||||
PseudoClass::Enabled => ":enabled",
|
||||
PseudoClass::Disabled => ":disabled",
|
||||
PseudoClass::Checked => ":checked",
|
||||
PseudoClass::Indeterminate => ":indeterminate",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Hash)]
|
||||
pub enum PseudoElement {}
|
||||
|
||||
impl ToCss for PseudoElement {
|
||||
fn to_css<W>(&self, _dest: &mut W) -> fmt::Result
|
||||
where
|
||||
W: fmt::Write,
|
||||
{
|
||||
match *self {}
|
||||
}
|
||||
}
|
||||
|
||||
impl selectors::parser::PseudoElement for PseudoElement {
|
||||
type Impl = KuchikiSelectors;
|
||||
}
|
||||
|
||||
impl selectors::Element for NodeDataRef<ElementData> {
|
||||
type Impl = KuchikiSelectors;
|
||||
|
||||
#[inline]
|
||||
fn opaque(&self) -> OpaqueElement {
|
||||
let node: &Node = self.as_node();
|
||||
OpaqueElement::new(node)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_html_slot_element(&self) -> bool {
|
||||
false
|
||||
}
|
||||
#[inline]
|
||||
fn parent_node_is_shadow_root(&self) -> bool {
|
||||
false
|
||||
}
|
||||
#[inline]
|
||||
fn containing_shadow_host(&self) -> Option<Self> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn parent_element(&self) -> Option<Self> {
|
||||
self.as_node().parent().and_then(NodeRef::into_element_ref)
|
||||
}
|
||||
#[inline]
|
||||
fn prev_sibling_element(&self) -> Option<Self> {
|
||||
self.as_node().preceding_siblings().elements().next()
|
||||
}
|
||||
#[inline]
|
||||
fn next_sibling_element(&self) -> Option<Self> {
|
||||
self.as_node().following_siblings().elements().next()
|
||||
}
|
||||
#[inline]
|
||||
fn is_empty(&self) -> bool {
|
||||
self.as_node().children().all(|child| match *child.data() {
|
||||
NodeData::Element(_) => false,
|
||||
NodeData::Text(ref text) => text.borrow().is_empty(),
|
||||
_ => true,
|
||||
})
|
||||
}
|
||||
#[inline]
|
||||
fn is_root(&self) -> bool {
|
||||
match self.as_node().parent() {
|
||||
None => false,
|
||||
Some(parent) => matches!(*parent.data(), NodeData::Document(_)),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_html_element_in_html_document(&self) -> bool {
|
||||
// FIXME: Have a notion of HTML document v.s. XML document?
|
||||
self.name.ns == ns!(html)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_local_name(&self, name: &LocalName) -> bool {
|
||||
self.name.local == *name
|
||||
}
|
||||
#[inline]
|
||||
fn has_namespace(&self, namespace: &Namespace) -> bool {
|
||||
self.name.ns == *namespace
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_part(&self, _name: &LocalName) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn exported_part(&self, _: &LocalName) -> Option<LocalName> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imported_part(&self, _: &LocalName) -> Option<LocalName> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_pseudo_element(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_same_type(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_link(&self) -> bool {
|
||||
self.name.ns == ns!(html)
|
||||
&& matches!(
|
||||
self.name.local,
|
||||
local_name!("a") | local_name!("area") | local_name!("link")
|
||||
)
|
||||
&& self
|
||||
.attributes
|
||||
.borrow()
|
||||
.map
|
||||
.contains_key(&ExpandedName::new(ns!(), local_name!("href")))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_id(&self, id: &LocalName, case_sensitivity: CaseSensitivity) -> bool {
|
||||
self.attributes
|
||||
.borrow()
|
||||
.get(local_name!("id"))
|
||||
.map_or(false, |id_attr| {
|
||||
case_sensitivity.eq(id.as_bytes(), id_attr.as_bytes())
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_class(&self, name: &LocalName, case_sensitivity: CaseSensitivity) -> bool {
|
||||
let name = name.as_bytes();
|
||||
!name.is_empty()
|
||||
&& if let Some(class_attr) = self.attributes.borrow().get(local_name!("class")) {
|
||||
class_attr
|
||||
.split(SELECTOR_WHITESPACE)
|
||||
.any(|class| case_sensitivity.eq(class.as_bytes(), name))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn attr_matches(
|
||||
&self,
|
||||
ns: &NamespaceConstraint<&Namespace>,
|
||||
local_name: &LocalName,
|
||||
operation: &AttrSelectorOperation<&String>,
|
||||
) -> bool {
|
||||
let attrs = self.attributes.borrow();
|
||||
match *ns {
|
||||
NamespaceConstraint::Any => attrs
|
||||
.map
|
||||
.iter()
|
||||
.any(|(name, attr)| name.local == *local_name && operation.eval_str(&attr.value)),
|
||||
NamespaceConstraint::Specific(ns_url) => attrs
|
||||
.map
|
||||
.get(&ExpandedName::new(ns_url, local_name.clone()))
|
||||
.map_or(false, |attr| operation.eval_str(&attr.value)),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_pseudo_element(
|
||||
&self,
|
||||
pseudo: &PseudoElement,
|
||||
_context: &mut matching::MatchingContext<KuchikiSelectors>,
|
||||
) -> bool {
|
||||
match *pseudo {}
|
||||
}
|
||||
|
||||
fn match_non_ts_pseudo_class<F>(
|
||||
&self,
|
||||
pseudo: &PseudoClass,
|
||||
_context: &mut matching::MatchingContext<KuchikiSelectors>,
|
||||
_flags_setter: &mut F,
|
||||
) -> bool
|
||||
where
|
||||
F: FnMut(&Self, matching::ElementSelectorFlags),
|
||||
{
|
||||
use self::PseudoClass::*;
|
||||
match *pseudo {
|
||||
Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => {
|
||||
false
|
||||
}
|
||||
AnyLink | Link => {
|
||||
self.name.ns == ns!(html)
|
||||
&& matches!(
|
||||
self.name.local,
|
||||
local_name!("a") | local_name!("area") | local_name!("link")
|
||||
)
|
||||
&& self.attributes.borrow().contains(local_name!("href"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A pre-compiled list of CSS Selectors.
|
||||
pub struct Selectors(pub Vec<Selector>);
|
||||
|
||||
/// A pre-compiled CSS Selector.
|
||||
pub struct Selector(GenericSelector<KuchikiSelectors>);
|
||||
|
||||
/// The specificity of a selector.
|
||||
///
|
||||
/// Opaque, but ordered.
|
||||
///
|
||||
/// Determines precedence in the cascading algorithm.
|
||||
/// When equal, a rule later in source order takes precedence.
|
||||
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub struct Specificity(u32);
|
||||
|
||||
impl Selectors {
|
||||
/// Compile a list of selectors. This may fail on syntax errors or unsupported selectors.
|
||||
#[inline]
|
||||
pub fn compile(s: &str) -> Result<Selectors, ()> {
|
||||
let mut input = cssparser::ParserInput::new(s);
|
||||
match SelectorList::parse(&KuchikiParser, &mut cssparser::Parser::new(&mut input)) {
|
||||
Ok(list) => Ok(Selectors(list.0.into_iter().map(Selector).collect())),
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the given element matches this list of selectors.
|
||||
#[inline]
|
||||
pub fn matches(&self, element: &NodeDataRef<ElementData>) -> bool {
|
||||
self.0.iter().any(|s| s.matches(element))
|
||||
}
|
||||
|
||||
/// Filter an element iterator, yielding those matching this list of selectors.
|
||||
#[inline]
|
||||
pub fn filter<I>(&self, iter: I) -> Select<I, &Selectors>
|
||||
where
|
||||
I: Iterator<Item = NodeDataRef<ElementData>>,
|
||||
{
|
||||
Select {
|
||||
iter,
|
||||
selectors: self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Selector {
|
||||
/// Returns whether the given element matches this selector.
|
||||
#[inline]
|
||||
pub fn matches(&self, element: &NodeDataRef<ElementData>) -> bool {
|
||||
let mut context = matching::MatchingContext::new(
|
||||
matching::MatchingMode::Normal,
|
||||
None,
|
||||
None,
|
||||
QuirksMode::NoQuirks,
|
||||
);
|
||||
matching::matches_selector(&self.0, 0, None, element, &mut context, &mut |_, _| {})
|
||||
}
|
||||
|
||||
/// Return the specificity of this selector.
|
||||
pub fn specificity(&self) -> Specificity {
|
||||
Specificity(self.0.specificity())
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::str::FromStr for Selectors {
|
||||
type Err = ();
|
||||
#[inline]
|
||||
fn from_str(s: &str) -> Result<Selectors, ()> {
|
||||
Selectors::compile(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Selector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.0.to_css(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Selectors {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut iter = self.0.iter();
|
||||
let first = iter
|
||||
.next()
|
||||
.expect("Empty Selectors, should contain at least one selector");
|
||||
first.0.to_css(f)?;
|
||||
for selector in iter {
|
||||
f.write_str(", ")?;
|
||||
selector.0.to_css(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Selector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Selectors {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
use html5ever::serialize::TraversalScope::*;
|
||||
use html5ever::serialize::{serialize, Serialize, SerializeOpts, Serializer, TraversalScope};
|
||||
use html5ever::QualName;
|
||||
use std::fs::File;
|
||||
use std::io::{Result, Write};
|
||||
use std::path::Path;
|
||||
use std::string::ToString;
|
||||
|
||||
use crate::tree::{NodeData, NodeRef};
|
||||
|
||||
impl Serialize for NodeRef {
|
||||
fn serialize<S: Serializer>(
|
||||
&self,
|
||||
serializer: &mut S,
|
||||
traversal_scope: TraversalScope,
|
||||
) -> Result<()> {
|
||||
match (traversal_scope, self.data()) {
|
||||
(ref scope, &NodeData::Element(ref element)) => {
|
||||
if *scope == IncludeNode {
|
||||
let attrs = element.attributes.borrow();
|
||||
|
||||
// Unfortunately we need to allocate something to hold these &'a QualName
|
||||
let attrs = attrs
|
||||
.map
|
||||
.iter()
|
||||
.map(|(name, attr)| {
|
||||
(
|
||||
QualName::new(
|
||||
attr.prefix.clone(),
|
||||
name.ns.clone(),
|
||||
name.local.clone(),
|
||||
),
|
||||
&attr.value,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
serializer.start_elem(
|
||||
element.name.clone(),
|
||||
attrs.iter().map(|&(ref name, value)| (name, &**value)),
|
||||
)?
|
||||
}
|
||||
|
||||
for child in self.children() {
|
||||
Serialize::serialize(&child, serializer, IncludeNode)?
|
||||
}
|
||||
|
||||
if *scope == IncludeNode {
|
||||
serializer.end_elem(element.name.clone())?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
(_, &NodeData::DocumentFragment) | (_, &NodeData::Document(_)) => {
|
||||
for child in self.children() {
|
||||
Serialize::serialize(&child, serializer, IncludeNode)?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
(ChildrenOnly(_), _) => Ok(()),
|
||||
|
||||
(IncludeNode, &NodeData::Doctype(ref doctype)) => {
|
||||
serializer.write_doctype(&doctype.name)
|
||||
}
|
||||
(IncludeNode, &NodeData::Text(ref text)) => serializer.write_text(&text.borrow()),
|
||||
(IncludeNode, &NodeData::Comment(ref text)) => serializer.write_comment(&text.borrow()),
|
||||
(IncludeNode, &NodeData::ProcessingInstruction(ref contents)) => {
|
||||
let contents = contents.borrow();
|
||||
serializer.write_processing_instruction(&contents.0, &contents.1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for NodeRef {
|
||||
#[inline]
|
||||
fn to_string(&self) -> String {
|
||||
let mut u8_vec = Vec::new();
|
||||
self.serialize(&mut u8_vec).unwrap();
|
||||
String::from_utf8(u8_vec).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeRef {
|
||||
/// Serialize this node and its descendants in HTML syntax to the given stream.
|
||||
#[inline]
|
||||
pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<()> {
|
||||
serialize(
|
||||
writer,
|
||||
self,
|
||||
SerializeOpts {
|
||||
traversal_scope: IncludeNode,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Serialize this node and its descendants in HTML syntax to a new file at the given path.
|
||||
#[inline]
|
||||
pub fn serialize_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
|
||||
let mut file = File::create(&path)?;
|
||||
self.serialize(&mut file)
|
||||
}
|
||||
}
|
|
@ -1,185 +0,0 @@
|
|||
use html5ever::tree_builder::QuirksMode;
|
||||
use html5ever::QualName;
|
||||
use std::path::Path;
|
||||
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::parser::{parse_html, parse_fragment};
|
||||
use crate::select::*;
|
||||
use crate::traits::*;
|
||||
|
||||
#[test]
|
||||
fn text_nodes() {
|
||||
let html = r"
|
||||
<!doctype html>
|
||||
<title>Test case</title>
|
||||
<p>Content contains <b>Important</b> data</p>";
|
||||
let document = parse_html().one(html);
|
||||
let paragraph = document.select("p").unwrap().collect::<Vec<_>>();
|
||||
assert_eq!(paragraph.len(), 1);
|
||||
assert_eq!(
|
||||
paragraph[0].text_contents(),
|
||||
"Content contains Important data"
|
||||
);
|
||||
let texts = paragraph[0]
|
||||
.as_node()
|
||||
.descendants()
|
||||
.text_nodes()
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(texts.len(), 3);
|
||||
assert_eq!(&*texts[0].borrow(), "Content contains ");
|
||||
assert_eq!(&*texts[1].borrow(), "Important");
|
||||
assert_eq!(&*texts[2].borrow(), " data");
|
||||
{
|
||||
let mut x = texts[0].borrow_mut();
|
||||
x.truncate(0);
|
||||
x.push_str("Content doesn't contain ");
|
||||
}
|
||||
assert_eq!(&*texts[0].borrow(), "Content doesn't contain ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_and_serialize() {
|
||||
let html = r"
|
||||
<!doctype html>
|
||||
<title>Test case</title>
|
||||
<p>Content";
|
||||
let document = parse_html().one(html);
|
||||
assert_eq!(
|
||||
document.as_document().unwrap().quirks_mode(),
|
||||
QuirksMode::NoQuirks
|
||||
);
|
||||
assert_eq!(
|
||||
document.to_string(),
|
||||
r"<!DOCTYPE html><html><head><title>Test case</title>
|
||||
</head><body><p>Content</p></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_and_serialize_fragment() {
|
||||
let html = r"<tbody><tr><td>Test case";
|
||||
|
||||
let ctx_name = QualName::new(None, ns!(html), local_name!("tbody"));
|
||||
let document = parse_fragment(ctx_name, vec![]).one(html);
|
||||
assert_eq!(document.as_document().unwrap().quirks_mode(), QuirksMode::NoQuirks);
|
||||
assert_eq!(document.to_string(), r"<html><tr><td>Test case</td></tr></html>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_file() {
|
||||
let mut path = Path::new(env!("CARGO_MANIFEST_DIR")).to_path_buf();
|
||||
path.push("test_data".to_string());
|
||||
path.push("foo.html");
|
||||
|
||||
let html = r"<!DOCTYPE html><html><head>
|
||||
<title>Test case</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Foo</p>
|
||||
|
||||
|
||||
</body></html>";
|
||||
let document = parse_html().from_utf8().from_file(&path).unwrap();
|
||||
assert_eq!(document.to_string(), html);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_and_read_file() {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let mut path = tempdir.path().to_path_buf();
|
||||
path.push("temp.html");
|
||||
|
||||
let html = r"<!DOCTYPE html><html><head><title>Title</title></head><body>Body</body></html>";
|
||||
let document = parse_html().one(html);
|
||||
let _ = document.serialize_to_file(path.clone());
|
||||
|
||||
let document2 = parse_html().from_utf8().from_file(&path).unwrap();
|
||||
assert_eq!(document.to_string(), document2.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select() {
|
||||
let html = r"
|
||||
<title>Test case</title>
|
||||
<p class=foo>Foo
|
||||
<p>Bar
|
||||
<p class=foo>Foo
|
||||
";
|
||||
|
||||
let document = parse_html().one(html);
|
||||
let matching = document.select("p.foo").unwrap().collect::<Vec<_>>();
|
||||
assert_eq!(matching.len(), 2);
|
||||
let child = matching[0].as_node().first_child().unwrap();
|
||||
assert_eq!(&**child.as_text().unwrap().borrow(), "Foo\n");
|
||||
assert_eq!(matching[0].attributes.borrow().get("class"), Some("foo"));
|
||||
assert_eq!(
|
||||
matching[0].attributes.borrow().get(local_name!("class")),
|
||||
Some("foo")
|
||||
);
|
||||
|
||||
let selectors = Selectors::compile("p.foo").unwrap();
|
||||
let matching2 = selectors
|
||||
.filter(document.descendants().elements())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(matching, matching2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_first() {
|
||||
let html = r"
|
||||
<title>Test case</title>
|
||||
<p class=foo>Foo
|
||||
<p>Bar
|
||||
<p class=foo>Baz
|
||||
";
|
||||
|
||||
let document = parse_html().one(html);
|
||||
let matching = document.select_first("p.foo").unwrap();
|
||||
let child = matching.as_node().first_child().unwrap();
|
||||
assert_eq!(&**child.as_text().unwrap().borrow(), "Foo\n");
|
||||
assert_eq!(matching.attributes.borrow().get("class"), Some("foo"));
|
||||
assert_eq!(
|
||||
matching.attributes.borrow().get(local_name!("class")),
|
||||
Some("foo")
|
||||
);
|
||||
|
||||
assert!(document.select_first("p.bar").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_string() {
|
||||
let html = r"<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test case</title>
|
||||
</head>
|
||||
<body>
|
||||
<p class=foo>Foo
|
||||
</body>
|
||||
</html>";
|
||||
|
||||
let document = parse_html().one(html);
|
||||
assert_eq!(
|
||||
document
|
||||
.inclusive_descendants()
|
||||
.nth(11)
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
"<p class=\"foo\">Foo\n \n</p>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn specificity() {
|
||||
let selectors = Selectors::compile(".example, :first-child, div").unwrap();
|
||||
let specificities = selectors
|
||||
.0
|
||||
.iter()
|
||||
.map(|s| s.specificity())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(specificities.len(), 3);
|
||||
assert!(specificities[0] == specificities[1]);
|
||||
assert!(specificities[0] > specificities[2]);
|
||||
assert!(specificities[1] > specificities[2]);
|
||||
}
|
|
@ -1,489 +0,0 @@
|
|||
use html5ever::tree_builder::QuirksMode;
|
||||
use html5ever::QualName;
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
use std::rc::{Rc, Weak};
|
||||
|
||||
use crate::attributes::{Attribute, Attributes, ExpandedName};
|
||||
use crate::cell_extras::*;
|
||||
use crate::iter::NodeIterator;
|
||||
|
||||
/// Node data specific to the node type.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum NodeData {
|
||||
/// Element node
|
||||
Element(ElementData),
|
||||
|
||||
/// Text node
|
||||
Text(RefCell<String>),
|
||||
|
||||
/// Comment node
|
||||
Comment(RefCell<String>),
|
||||
|
||||
/// Processing instruction node
|
||||
ProcessingInstruction(RefCell<(String, String)>),
|
||||
|
||||
/// Doctype node
|
||||
Doctype(Doctype),
|
||||
|
||||
/// Document node
|
||||
Document(DocumentData),
|
||||
|
||||
/// Document fragment node
|
||||
DocumentFragment,
|
||||
}
|
||||
|
||||
/// Data specific to doctype nodes.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Doctype {
|
||||
/// The name of the doctype
|
||||
pub name: String,
|
||||
|
||||
/// The public ID of the doctype
|
||||
pub public_id: String,
|
||||
|
||||
/// The system ID of the doctype
|
||||
pub system_id: String,
|
||||
}
|
||||
|
||||
/// Data specific to element nodes.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ElementData {
|
||||
/// The namespace and local name of the element, such as `ns!(html)` and `body`.
|
||||
pub name: QualName,
|
||||
|
||||
/// The attributes of the elements.
|
||||
pub attributes: RefCell<Attributes>,
|
||||
|
||||
/// If the element is an HTML `<template>` element,
|
||||
/// the document fragment node that is the root of template contents.
|
||||
pub template_contents: Option<NodeRef>,
|
||||
}
|
||||
|
||||
/// Data specific to document nodes.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct DocumentData {
|
||||
#[doc(hidden)]
|
||||
pub _quirks_mode: Cell<QuirksMode>,
|
||||
}
|
||||
|
||||
impl DocumentData {
|
||||
/// The quirks mode of the document, as determined by the HTML parser.
|
||||
#[inline]
|
||||
pub fn quirks_mode(&self) -> QuirksMode {
|
||||
self._quirks_mode.get()
|
||||
}
|
||||
}
|
||||
|
||||
/// A strong reference to a node.
|
||||
///
|
||||
/// A node is destroyed when the last strong reference to it dropped.
|
||||
///
|
||||
/// Each node holds a strong reference to its first child and next sibling (if any),
|
||||
/// but only a weak reference to its last child, previous sibling, and parent.
|
||||
/// This is to avoid strong reference cycles, which would cause memory leaks.
|
||||
///
|
||||
/// As a result, a single `NodeRef` is sufficient to keep alive a node
|
||||
/// and nodes that are after it in tree order
|
||||
/// (its descendants, its following siblings, and their descendants)
|
||||
/// but not other nodes in a tree.
|
||||
///
|
||||
/// To avoid detroying nodes prematurely,
|
||||
/// programs typically hold a strong reference to the root of a document
|
||||
/// until they’re done with that document.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NodeRef(pub Rc<Node>);
|
||||
|
||||
impl Deref for NodeRef {
|
||||
type Target = Node;
|
||||
#[inline]
|
||||
fn deref(&self) -> &Node {
|
||||
&*self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for NodeRef {}
|
||||
impl PartialEq for NodeRef {
|
||||
#[inline]
|
||||
fn eq(&self, other: &NodeRef) -> bool {
|
||||
let a: *const Node = &*self.0;
|
||||
let b: *const Node = &*other.0;
|
||||
a == b
|
||||
}
|
||||
}
|
||||
|
||||
/// A node inside a DOM-like tree.
|
||||
pub struct Node {
|
||||
parent: Cell<Option<Weak<Node>>>,
|
||||
previous_sibling: Cell<Option<Weak<Node>>>,
|
||||
next_sibling: Cell<Option<Rc<Node>>>,
|
||||
first_child: Cell<Option<Rc<Node>>>,
|
||||
last_child: Cell<Option<Weak<Node>>>,
|
||||
data: NodeData,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Node {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "{:?} @ {:?}", self.data, self as *const Node)
|
||||
}
|
||||
}
|
||||
|
||||
/// Prevent implicit recursion when dropping nodes to avoid overflowing the stack.
|
||||
///
|
||||
/// The implicit drop is correct, but recursive.
|
||||
/// In the worst case (where no node has both a next sibling and a child),
|
||||
/// a tree of a few tens of thousands of nodes could cause a stack overflow.
|
||||
///
|
||||
/// This `Drop` implementations makes sure the recursion does not happen.
|
||||
/// Instead, it has an explicit `Vec<Rc<Node>>` stack to traverse the subtree,
|
||||
/// but only following `Rc<Node>` references that are "unique":
|
||||
/// that have a strong reference count of 1.
|
||||
/// Those are the nodes that would have been dropped recursively.
|
||||
///
|
||||
/// The stack holds ancestors of the current node rather than preceding siblings,
|
||||
/// on the assumption that large document trees are typically wider than deep.
|
||||
impl Drop for Node {
|
||||
fn drop(&mut self) {
|
||||
// `.take_if_unique_strong()` temporarily leaves the tree in an inconsistent state,
|
||||
// as the corresponding `Weak` reference in the other direction is not removed.
|
||||
// It is important that all `Some(_)` strong references it returns
|
||||
// are dropped by the end of this `drop` call,
|
||||
// and that no user code is invoked in-between.
|
||||
|
||||
// Sharing `stack` between these two calls is not necessary,
|
||||
// but it allows re-using memory allocations.
|
||||
let mut stack = Vec::new();
|
||||
if let Some(rc) = self.first_child.take_if_unique_strong() {
|
||||
non_recursive_drop_unique_rc(rc, &mut stack);
|
||||
}
|
||||
if let Some(rc) = self.next_sibling.take_if_unique_strong() {
|
||||
non_recursive_drop_unique_rc(rc, &mut stack);
|
||||
}
|
||||
|
||||
fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
|
||||
loop {
|
||||
if let Some(child) = rc.first_child.take_if_unique_strong() {
|
||||
stack.push(rc);
|
||||
rc = child;
|
||||
continue;
|
||||
}
|
||||
if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
|
||||
// The previous value of `rc: Rc<Node>` is dropped here.
|
||||
// Since it was unique, the corresponding `Node` is dropped as well.
|
||||
// `<Node as Drop>::drop` does not call `drop_rc`
|
||||
// as both the first child and next sibling were already taken.
|
||||
// Weak reference counts decremented here for `Cell`s that are `Some`:
|
||||
// * `rc.parent`: still has a strong reference in `stack` or elsewhere
|
||||
// * `rc.last_child`: this is the last weak ref. Deallocated now.
|
||||
// * `rc.previous_sibling`: this is the last weak ref. Deallocated now.
|
||||
rc = sibling;
|
||||
continue;
|
||||
}
|
||||
if let Some(parent) = stack.pop() {
|
||||
// Same as in the above comment.
|
||||
rc = parent;
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeRef {
|
||||
/// Create a new node.
|
||||
#[inline]
|
||||
pub fn new(data: NodeData) -> NodeRef {
|
||||
NodeRef(Rc::new(Node {
|
||||
parent: Cell::new(None),
|
||||
first_child: Cell::new(None),
|
||||
last_child: Cell::new(None),
|
||||
previous_sibling: Cell::new(None),
|
||||
next_sibling: Cell::new(None),
|
||||
data,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Create a new element node.
|
||||
#[inline]
|
||||
pub fn new_element<I>(name: QualName, attributes: I) -> NodeRef
|
||||
where
|
||||
I: IntoIterator<Item = (ExpandedName, Attribute)>,
|
||||
{
|
||||
NodeRef::new(NodeData::Element(ElementData {
|
||||
template_contents: if name.expanded() == expanded_name!(html "template") {
|
||||
Some(NodeRef::new(NodeData::DocumentFragment))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
name,
|
||||
attributes: RefCell::new(Attributes {
|
||||
map: attributes.into_iter().collect(),
|
||||
}),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Create a new text node.
|
||||
#[inline]
|
||||
pub fn new_text<T: Into<String>>(value: T) -> NodeRef {
|
||||
NodeRef::new(NodeData::Text(RefCell::new(value.into())))
|
||||
}
|
||||
|
||||
/// Create a new comment node.
|
||||
#[inline]
|
||||
pub fn new_comment<T: Into<String>>(value: T) -> NodeRef {
|
||||
NodeRef::new(NodeData::Comment(RefCell::new(value.into())))
|
||||
}
|
||||
|
||||
/// Create a new processing instruction node.
|
||||
#[inline]
|
||||
pub fn new_processing_instruction<T1, T2>(target: T1, data: T2) -> NodeRef
|
||||
where
|
||||
T1: Into<String>,
|
||||
T2: Into<String>,
|
||||
{
|
||||
NodeRef::new(NodeData::ProcessingInstruction(RefCell::new((
|
||||
target.into(),
|
||||
data.into(),
|
||||
))))
|
||||
}
|
||||
|
||||
/// Create a new doctype node.
|
||||
#[inline]
|
||||
pub fn new_doctype<T1, T2, T3>(name: T1, public_id: T2, system_id: T3) -> NodeRef
|
||||
where
|
||||
T1: Into<String>,
|
||||
T2: Into<String>,
|
||||
T3: Into<String>,
|
||||
{
|
||||
NodeRef::new(NodeData::Doctype(Doctype {
|
||||
name: name.into(),
|
||||
public_id: public_id.into(),
|
||||
system_id: system_id.into(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Create a new document node.
|
||||
#[inline]
|
||||
pub fn new_document() -> NodeRef {
|
||||
NodeRef::new(NodeData::Document(DocumentData {
|
||||
_quirks_mode: Cell::new(QuirksMode::NoQuirks),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Return the concatenation of all text nodes in this subtree.
|
||||
pub fn text_contents(&self) -> String {
|
||||
let mut s = String::new();
|
||||
for text_node in self.inclusive_descendants().text_nodes() {
|
||||
s.push_str(&text_node.borrow());
|
||||
}
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Return a reference to this node’s node-type-specific data.
|
||||
#[inline]
|
||||
pub fn data(&self) -> &NodeData {
|
||||
&self.data
|
||||
}
|
||||
|
||||
/// If this node is an element, return a reference to element-specific data.
|
||||
#[inline]
|
||||
pub fn as_element(&self) -> Option<&ElementData> {
|
||||
match self.data {
|
||||
NodeData::Element(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a text node, return a reference to its contents.
|
||||
#[inline]
|
||||
pub fn as_text(&self) -> Option<&RefCell<String>> {
|
||||
match self.data {
|
||||
NodeData::Text(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a comment, return a reference to its contents.
|
||||
#[inline]
|
||||
pub fn as_comment(&self) -> Option<&RefCell<String>> {
|
||||
match self.data {
|
||||
NodeData::Comment(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a document, return a reference to doctype-specific data.
|
||||
#[inline]
|
||||
pub fn as_doctype(&self) -> Option<&Doctype> {
|
||||
match self.data {
|
||||
NodeData::Doctype(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a document, return a reference to document-specific data.
|
||||
#[inline]
|
||||
pub fn as_document(&self) -> Option<&DocumentData> {
|
||||
match self.data {
|
||||
NodeData::Document(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a reference to the parent node, unless this node is the root of the tree.
|
||||
#[inline]
|
||||
pub fn parent(&self) -> Option<NodeRef> {
|
||||
self.parent.upgrade().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the first child of this node, unless it has no child.
|
||||
#[inline]
|
||||
pub fn first_child(&self) -> Option<NodeRef> {
|
||||
self.first_child.clone_inner().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the last child of this node, unless it has no child.
|
||||
#[inline]
|
||||
pub fn last_child(&self) -> Option<NodeRef> {
|
||||
self.last_child.upgrade().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the previous sibling of this node, unless it is a first child.
|
||||
#[inline]
|
||||
pub fn previous_sibling(&self) -> Option<NodeRef> {
|
||||
self.previous_sibling.upgrade().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the next sibling of this node, unless it is a last child.
|
||||
#[inline]
|
||||
pub fn next_sibling(&self) -> Option<NodeRef> {
|
||||
self.next_sibling.clone_inner().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Detach a node from its parent and siblings. Children are not affected.
|
||||
///
|
||||
/// To remove a node and its descendants, detach it and drop any strong reference to it.
|
||||
pub fn detach(&self) {
|
||||
let parent_weak = self.parent.take();
|
||||
let previous_sibling_weak = self.previous_sibling.take();
|
||||
let next_sibling_strong = self.next_sibling.take();
|
||||
|
||||
let previous_sibling_opt = previous_sibling_weak
|
||||
.as_ref()
|
||||
.and_then(|weak| weak.upgrade());
|
||||
|
||||
if let Some(next_sibling_ref) = next_sibling_strong.as_ref() {
|
||||
next_sibling_ref
|
||||
.previous_sibling
|
||||
.replace(previous_sibling_weak);
|
||||
} else if let Some(parent_ref) = parent_weak.as_ref() {
|
||||
if let Some(parent_strong) = parent_ref.upgrade() {
|
||||
parent_strong.last_child.replace(previous_sibling_weak);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(previous_sibling_strong) = previous_sibling_opt {
|
||||
previous_sibling_strong
|
||||
.next_sibling
|
||||
.replace(next_sibling_strong);
|
||||
} else if let Some(parent_ref) = parent_weak.as_ref() {
|
||||
if let Some(parent_strong) = parent_ref.upgrade() {
|
||||
parent_strong.first_child.replace(next_sibling_strong);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeRef {
|
||||
/// Append a new child to this node, after existing children.
|
||||
///
|
||||
/// The new child is detached from its previous position.
|
||||
pub fn append(&self, new_child: NodeRef) {
|
||||
new_child.detach();
|
||||
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
|
||||
if let Some(last_child_weak) = self.last_child.replace(Some(Rc::downgrade(&new_child.0))) {
|
||||
if let Some(last_child) = last_child_weak.upgrade() {
|
||||
new_child.previous_sibling.replace(Some(last_child_weak));
|
||||
debug_assert!(last_child.next_sibling.is_none());
|
||||
last_child.next_sibling.replace(Some(new_child.0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
debug_assert!(self.first_child.is_none());
|
||||
self.first_child.replace(Some(new_child.0));
|
||||
}
|
||||
|
||||
/// Prepend a new child to this node, before existing children.
|
||||
///
|
||||
/// The new child is detached from its previous position.
|
||||
pub fn prepend(&self, new_child: NodeRef) {
|
||||
new_child.detach();
|
||||
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
|
||||
if let Some(first_child) = self.first_child.take() {
|
||||
debug_assert!(first_child.previous_sibling.is_none());
|
||||
first_child
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&new_child.0)));
|
||||
new_child.next_sibling.replace(Some(first_child));
|
||||
} else {
|
||||
debug_assert!(self.first_child.is_none());
|
||||
self.last_child.replace(Some(Rc::downgrade(&new_child.0)));
|
||||
}
|
||||
self.first_child.replace(Some(new_child.0));
|
||||
}
|
||||
|
||||
/// Insert a new sibling after this node.
|
||||
///
|
||||
/// The new sibling is detached from its previous position.
|
||||
pub fn insert_after(&self, new_sibling: NodeRef) {
|
||||
new_sibling.detach();
|
||||
new_sibling.parent.replace(self.parent.clone_inner());
|
||||
new_sibling
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&self.0)));
|
||||
if let Some(next_sibling) = self.next_sibling.take() {
|
||||
debug_assert!(next_sibling.previous_sibling().unwrap() == *self);
|
||||
next_sibling
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&new_sibling.0)));
|
||||
new_sibling.next_sibling.replace(Some(next_sibling));
|
||||
} else if let Some(parent) = self.parent() {
|
||||
debug_assert!(parent.last_child().unwrap() == *self);
|
||||
parent
|
||||
.last_child
|
||||
.replace(Some(Rc::downgrade(&new_sibling.0)));
|
||||
}
|
||||
self.next_sibling.replace(Some(new_sibling.0));
|
||||
}
|
||||
|
||||
/// Insert a new sibling before this node.
|
||||
///
|
||||
/// The new sibling is detached from its previous position.
|
||||
pub fn insert_before(&self, new_sibling: NodeRef) {
|
||||
new_sibling.detach();
|
||||
new_sibling.parent.replace(self.parent.clone_inner());
|
||||
new_sibling.next_sibling.replace(Some(self.0.clone()));
|
||||
if let Some(previous_sibling_weak) = self
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&new_sibling.0)))
|
||||
{
|
||||
if let Some(previous_sibling) = previous_sibling_weak.upgrade() {
|
||||
new_sibling
|
||||
.previous_sibling
|
||||
.replace(Some(previous_sibling_weak));
|
||||
debug_assert!(previous_sibling.next_sibling().unwrap() == *self);
|
||||
previous_sibling.next_sibling.replace(Some(new_sibling.0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if let Some(parent) = self.parent() {
|
||||
debug_assert!(parent.first_child().unwrap() == *self);
|
||||
parent.first_child.replace(Some(new_sibling.0));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test case</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Foo</p>
|
||||
</body>
|
||||
</html>
|
|
@ -1,31 +0,0 @@
|
|||
name: Build
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- macOS-latest
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
override: true
|
||||
- name: Build
|
||||
run: |
|
||||
cargo build --all-targets --no-default-features --verbose
|
||||
cargo build --all-targets --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --all-targets --verbose
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
|
@ -1,27 +0,0 @@
|
|||
name: Coverage
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
coverage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions-rs/install@v0.1
|
||||
with:
|
||||
crate: cargo-tarpaulin
|
||||
use-tool-cache: true
|
||||
- name: Run coverage
|
||||
run: cargo tarpaulin -f -t 5 --out Xml -v -- --test-threads=1
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
token: ${{secrets.CODECOV_TOKEN}}
|
|
@ -1,24 +0,0 @@
|
|||
name: Style check
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
clippy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install clippy
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
components: clippy
|
||||
- uses: actions-rs/clippy-check@v1
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
args: --all --all-features
|
||||
fmt:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Run fmt check
|
||||
run: cargo fmt --all -- --check
|
|
@ -1,4 +0,0 @@
|
|||
/target/
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
/.vscode
|
|
@ -1,16 +0,0 @@
|
|||
[package]
|
||||
name = "sanitize_html"
|
||||
version = "0.7.0"
|
||||
authors = ["Andrey Kutejko <andy128k@gmail.com>"]
|
||||
description = "Rule-based HTML Sanitization library"
|
||||
keywords = ["html", "sanitize"]
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/andy128k/sanitize-html-rs"
|
||||
repository = "https://github.com/andy128k/sanitize-html-rs.git"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
regex = "^1.5.6"
|
||||
lazy_static = "^1.4.0"
|
||||
html5ever = "^0.26"
|
||||
kuchiki = { path = "../kuchiki" }
|
|
@ -1,18 +0,0 @@
|
|||
Copyright (c) 2017 Andrey Kutejko
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -1,8 +0,0 @@
|
|||
# Sanitize HTML
|
||||
|
||||
[![Crates.io Status](https://img.shields.io/crates/v/sanitize_html.svg)](https://crates.io/crates/sanitize_html)
|
||||
[![Build](https://github.com/andy128k/sanitize-html-rs/workflows/Build/badge.svg?branch=master&event=push)](https://github.com/andy128k/sanitize-html-rs/actions?query=workflow%3ABuild)
|
||||
[![codecov](https://codecov.io/gh/andy128k/sanitize-html-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/andy128k/sanitize-html-rs)
|
||||
[![dependency status](https://deps.rs/repo/github/andy128k/sanitize-html-rs/status.svg)](https://deps.rs/repo/github/andy128k/sanitize-html-rs)
|
||||
|
||||
This is a library for sanitization of HTML fragments.
|
|
@ -1,37 +0,0 @@
|
|||
//! Error types, which can be emited by sanitization procedure.
|
||||
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
/// Sanitization error
|
||||
#[derive(Debug)]
|
||||
pub enum SanitizeError {
|
||||
/// UTF-8 decoding error
|
||||
StrUtf8Error(std::str::Utf8Error),
|
||||
|
||||
/// UTF-8 decoding error
|
||||
Utf8Error(std::string::FromUtf8Error),
|
||||
|
||||
/// Serialization error
|
||||
SerializeError(std::io::Error),
|
||||
}
|
||||
|
||||
impl fmt::Display for SanitizeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SanitizeError::StrUtf8Error(e) => write!(f, "UTF-8 decode error {}", e),
|
||||
SanitizeError::Utf8Error(e) => write!(f, "UTF-8 decode error {}", e),
|
||||
SanitizeError::SerializeError(e) => write!(f, "Serialization error {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SanitizeError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
SanitizeError::StrUtf8Error(e) => Some(e),
|
||||
SanitizeError::Utf8Error(e) => Some(e),
|
||||
SanitizeError::SerializeError(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
//! HTML Sanitization library
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```
|
||||
//! use sanitize_html::sanitize_str;
|
||||
//! use sanitize_html::rules::predefined::DEFAULT;
|
||||
//!
|
||||
//! let input = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
|
||||
//!
|
||||
//! let sanitized_default: String = sanitize_str(&DEFAULT, input).unwrap();
|
||||
//! assert_eq!(&sanitized_default, "Lorem ipsum dolor sit amet ");
|
||||
//! ```
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub mod errors;
|
||||
mod parse;
|
||||
pub mod rules;
|
||||
mod sanitize;
|
||||
mod tests;
|
||||
|
||||
use crate::errors::SanitizeError;
|
||||
use crate::rules::Rules;
|
||||
|
||||
/// Sanitize HTML bytes
|
||||
pub fn sanitize_bytes(rules: &Rules, input: &[u8]) -> Result<Vec<u8>, SanitizeError> {
|
||||
let input_str = std::str::from_utf8(input).map_err(SanitizeError::StrUtf8Error)?;
|
||||
let dom = parse::parse_str(input_str);
|
||||
let new_dom = sanitize::sanitize_dom(&dom, rules);
|
||||
let result_bytes = parse::unparse_bytes(&new_dom)?;
|
||||
Ok(result_bytes)
|
||||
}
|
||||
|
||||
/// Sanitize HTML string
|
||||
pub fn sanitize_str(rules: &Rules, input: &str) -> Result<String, SanitizeError> {
|
||||
let dom = parse::parse_str(input);
|
||||
let new_dom = sanitize::sanitize_dom(&dom, rules);
|
||||
let result_bytes = parse::unparse_bytes(&new_dom)?;
|
||||
let result_string = String::from_utf8(result_bytes).map_err(SanitizeError::Utf8Error)?;
|
||||
Ok(result_string)
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
use super::errors::SanitizeError;
|
||||
use html5ever::{
|
||||
interface::QualName,
|
||||
local_name, namespace_prefix, namespace_url, ns, serialize,
|
||||
serialize::{SerializeOpts, TraversalScope},
|
||||
tendril::TendrilSink,
|
||||
};
|
||||
use kuchiki::{parse_html_with_options, NodeRef, ParseOpts};
|
||||
use std::default::Default;
|
||||
|
||||
pub(crate) fn parse_str(input: &str) -> NodeRef {
|
||||
let mut opts = ParseOpts::default();
|
||||
opts.tree_builder.drop_doctype = true;
|
||||
|
||||
let mut parser = parse_html_with_options(opts);
|
||||
parser.process(input.into());
|
||||
parser.finish()
|
||||
}
|
||||
|
||||
pub(crate) fn unparse_bytes(dom: &NodeRef) -> Result<Vec<u8>, SanitizeError> {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
|
||||
let parent = QualName::new(
|
||||
Some(namespace_prefix!("html")),
|
||||
ns!(html),
|
||||
local_name!("div"),
|
||||
);
|
||||
|
||||
let opts = SerializeOpts {
|
||||
scripting_enabled: false,
|
||||
traversal_scope: TraversalScope::ChildrenOnly(Some(parent)),
|
||||
create_missing_parent: false,
|
||||
};
|
||||
|
||||
serialize(&mut buf, dom, opts).map_err(SanitizeError::SerializeError)?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
|
@ -1,141 +0,0 @@
|
|||
//! Structures to define sanitization rules.
|
||||
|
||||
pub mod pattern;
|
||||
pub mod predefined;
|
||||
|
||||
use self::pattern::Pattern;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// structure to describe HTML element
|
||||
pub struct Element {
|
||||
/// name of an element
|
||||
pub name: String,
|
||||
/// Whitelist of allowed attributes
|
||||
pub attributes: HashMap<String, Pattern>,
|
||||
/// List of mandatory atributes and their values.
|
||||
/// These attributes will be forcibly added to element.
|
||||
pub mandatory_attributes: HashMap<String, String>,
|
||||
/// Attribute rules
|
||||
pub attribute_rules: AttributeRules,
|
||||
}
|
||||
|
||||
impl Element {
|
||||
/// Creates element descriptor
|
||||
pub fn new(name: &str) -> Self {
|
||||
Self {
|
||||
name: name.to_owned(),
|
||||
attributes: HashMap::new(),
|
||||
mandatory_attributes: HashMap::new(),
|
||||
attribute_rules: AttributeRules::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds an attribute
|
||||
pub fn attribute(mut self, attribute: &str, pattern: Pattern) -> Self {
|
||||
self.attributes.insert(attribute.to_owned(), pattern);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds mandatory attribute
|
||||
pub fn mandatory_attribute(mut self, attribute: &str, value: &str) -> Self {
|
||||
self.mandatory_attributes
|
||||
.insert(attribute.to_owned(), value.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Checks if attribute is valid
|
||||
pub fn is_valid(&self, attribute: &str, value: &str) -> bool {
|
||||
match self.attributes.get(attribute) {
|
||||
None => false,
|
||||
Some(pattern) => pattern.matches(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// structure to describe sanitization rules
|
||||
#[derive(Default)]
|
||||
pub struct Rules {
|
||||
/// Determines if comments are kept of stripped out of a document.
|
||||
pub allow_comments: bool,
|
||||
/// Allowed elements.
|
||||
pub allowed_elements: HashMap<String, Element>,
|
||||
/// Elements which will be removed together with their children.
|
||||
pub delete_elements: HashSet<String>,
|
||||
/// Elements which will be replaced by spaces (Their children will be processed recursively).
|
||||
pub space_elements: HashSet<String>,
|
||||
/// Elements which will be renamed.
|
||||
pub rename_elements: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl Rules {
|
||||
/// Creates a new rules set.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Sets if comments are allowed
|
||||
pub fn allow_comments(mut self, allow_comments: bool) -> Self {
|
||||
self.allow_comments = allow_comments;
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule for an allowed element
|
||||
pub fn element(mut self, element: Element) -> Self {
|
||||
self.allowed_elements.insert(element.name.clone(), element);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule to delete an element
|
||||
pub fn delete(mut self, element_name: &str) -> Self {
|
||||
self.delete_elements.insert(element_name.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule to replace an element with space
|
||||
pub fn space(mut self, element_name: &str) -> Self {
|
||||
self.space_elements.insert(element_name.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule to rename an element
|
||||
pub fn rename(mut self, element_name: &str, to: &str) -> Self {
|
||||
self.rename_elements
|
||||
.insert(element_name.to_owned(), to.to_owned());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Structure to define rules for attributes
|
||||
#[derive(Default)]
|
||||
pub struct AttributeRules {
|
||||
/// Atrributes which will be renamed.
|
||||
pub rename_attributes: HashMap<String, String>,
|
||||
/// Functions to modify attribute contents
|
||||
pub modify_attributes: HashMap<String, Box<dyn Fn(String) -> String + Sync>>,
|
||||
}
|
||||
|
||||
impl AttributeRules {
|
||||
/// Create a new attribute rules set.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Adds a rule to rename an attribute
|
||||
pub fn rename(&mut self, attribute_name: &str, to: &str) -> &Self {
|
||||
self.rename_attributes
|
||||
.insert(attribute_name.to_owned(), to.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule with a function to modify the contents of an attribute
|
||||
pub fn modify(
|
||||
&mut self,
|
||||
attribute_name: &str,
|
||||
function: Box<dyn Fn(String) -> String + Sync>,
|
||||
) -> &Self {
|
||||
self.modify_attributes
|
||||
.insert(attribute_name.to_owned(), function);
|
||||
self
|
||||
}
|
||||
}
|
|
@ -1,127 +0,0 @@
|
|||
//! This module contains code dedicated to check validity of attribute's value.
|
||||
//!
|
||||
//! # Examples
|
||||
//! ```
|
||||
//! use sanitize_html::rules::pattern::Pattern;
|
||||
//! use regex::Regex;
|
||||
//!
|
||||
//! let href = Pattern::regex(Regex::new("^(ftp:|http:|https:|mailto:)").unwrap()) |
|
||||
//! !Pattern::regex(Regex::new("^[^/]+[[:space:]]*:").unwrap());
|
||||
//!
|
||||
//! assert!(href.matches("filename.xls"));
|
||||
//! assert!(href.matches("http://foo.com/"));
|
||||
//! assert!(href.matches(" filename with spaces .zip "));
|
||||
//! assert!(!href.matches(" javascript : window.location = '//example.com/'")); // Attempt to make XSS
|
||||
//! ```
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
/// Value pattern
|
||||
pub struct Pattern(pub Box<dyn Fn(&str) -> bool + Sync + Send>);
|
||||
|
||||
impl Pattern {
|
||||
/// Creates pattern which accepts any value.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pattern = Pattern::any();
|
||||
/// assert!(pattern.matches(""));
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// ```
|
||||
pub fn any() -> Self {
|
||||
Pattern(Box::new(move |_value| true))
|
||||
}
|
||||
|
||||
/// Creates pattern which uses regular expression to check a value. Panics
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pattern = Pattern::regex(Regex::new("ant").unwrap());
|
||||
/// assert!(!pattern.matches(""));
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// ```
|
||||
pub fn regex(re: Regex) -> Self {
|
||||
Pattern(Box::new(move |value| re.is_match(value)))
|
||||
}
|
||||
|
||||
/// Checks if a value matches to a pattern.
|
||||
pub fn matches(&self, value: &str) -> bool {
|
||||
(self.0)(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::ops::Not for Pattern {
|
||||
type Output = Pattern;
|
||||
|
||||
/// Negates pattern
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pattern = !Pattern::any();
|
||||
/// assert!(!pattern.matches(""));
|
||||
/// assert!(!pattern.matches("pants"));
|
||||
/// ```
|
||||
fn not(self) -> Self::Output {
|
||||
let cb = self.0;
|
||||
Pattern(Box::new(move |value| !cb(value)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::ops::BitAnd for Pattern {
|
||||
type Output = Pattern;
|
||||
|
||||
/// Combines two patterns into a pattern which matches a string iff both patterns match that string.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
|
||||
/// let ant = Pattern::regex(Regex::new("ant").unwrap());
|
||||
/// let pattern = pan & ant;
|
||||
///
|
||||
/// assert!(!pattern.matches("pan"));
|
||||
/// assert!(!pattern.matches("ant"));
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// ```
|
||||
fn bitand(self, rhs: Pattern) -> Self::Output {
|
||||
let cb1 = self.0;
|
||||
let cb2 = rhs.0;
|
||||
Pattern(Box::new(move |value| cb1(value) && cb2(value)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::ops::BitOr for Pattern {
|
||||
type Output = Pattern;
|
||||
|
||||
/// Combines two patterns into a pattern which matches a string if one of patterns matches that string.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
|
||||
/// let pot = Pattern::regex(Regex::new("pot").unwrap());
|
||||
/// let pattern = pan | pot;
|
||||
///
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// assert!(pattern.matches("pot"));
|
||||
/// assert!(!pattern.matches("jar"));
|
||||
/// ```
|
||||
fn bitor(self, rhs: Pattern) -> Self::Output {
|
||||
let cb1 = self.0;
|
||||
let cb2 = rhs.0;
|
||||
Pattern(Box::new(move |value| cb1(value) || cb2(value)))
|
||||
}
|
||||
}
|
|
@ -1,380 +0,0 @@
|
|||
//! Predefined rules
|
||||
//!
|
||||
//! These rules are inspired by a great Ruby gem [sanitize](https://github.com/rgrove/sanitize/).
|
||||
|
||||
use super::pattern::Pattern;
|
||||
use super::{Element, Rules};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
fn re(regex: &str) -> Pattern {
|
||||
Pattern::regex(Regex::new(regex).unwrap())
|
||||
}
|
||||
|
||||
fn href() -> Pattern {
|
||||
re("^(ftp:|http:|https:|mailto:)") | !re("^[^/]+[[:space:]]*:")
|
||||
}
|
||||
|
||||
fn src() -> Pattern {
|
||||
re("^(http:|https:)") | !re("^[^/]+[[:space:]]*:")
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Basic rules. Allows a variety of markup including formatting elements, links, and lists.
|
||||
pub static ref BASIC: Rules = basic();
|
||||
|
||||
/// Default rules. Removes all tags.
|
||||
pub static ref DEFAULT: Rules = default();
|
||||
|
||||
/// Relaxed rules. Allows an even wider variety of markup, including images and tables
|
||||
pub static ref RELAXED: Rules = relaxed();
|
||||
|
||||
/// Restricted rules. Allows only very simple inline markup. No links, images, or block elements.
|
||||
pub static ref RESTRICTED: Rules = restricted();
|
||||
|
||||
/// Rules for document from untrusted sources. Removes all tags but text emphasizing and links.
|
||||
pub static ref UNTRUSTED: Rules = untrusted();
|
||||
}
|
||||
|
||||
/// Basic rules. Allows a variety of markup including formatting elements, links, and lists.
|
||||
pub fn basic() -> Rules {
|
||||
Rules::new()
|
||||
.element(Element::new("a").attribute("href", href()))
|
||||
.element(Element::new("abbr").attribute("title", Pattern::any()))
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("blockquote").attribute("cite", src()))
|
||||
.element(Element::new("br"))
|
||||
.element(Element::new("br"))
|
||||
.element(Element::new("cite"))
|
||||
.element(Element::new("code"))
|
||||
.element(Element::new("dd"))
|
||||
.element(Element::new("dfn").attribute("title", Pattern::any()))
|
||||
.element(Element::new("dl"))
|
||||
.element(Element::new("dt"))
|
||||
.element(Element::new("em"))
|
||||
.element(Element::new("i"))
|
||||
.element(Element::new("kbd"))
|
||||
.element(Element::new("li"))
|
||||
.element(Element::new("mark"))
|
||||
.element(Element::new("ol"))
|
||||
.element(Element::new("p"))
|
||||
.element(Element::new("pre"))
|
||||
.element(Element::new("q").attribute("cite", src()))
|
||||
.element(Element::new("s"))
|
||||
.element(Element::new("samp"))
|
||||
.element(Element::new("small"))
|
||||
.element(Element::new("strike"))
|
||||
.element(Element::new("strong"))
|
||||
.element(Element::new("sub"))
|
||||
.element(Element::new("sup"))
|
||||
.element(
|
||||
Element::new("time")
|
||||
.attribute("datetime", Pattern::any())
|
||||
.attribute("pubdate", Pattern::any()),
|
||||
)
|
||||
.element(Element::new("u"))
|
||||
.element(Element::new("ul"))
|
||||
.element(Element::new("var"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("div")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("nav")
|
||||
.space("section")
|
||||
.delete("element_name")
|
||||
}
|
||||
|
||||
/// Default rules. Removes all tags.
|
||||
pub fn default() -> Rules {
|
||||
Rules::new()
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("blockquote")
|
||||
.space("br")
|
||||
.space("dd")
|
||||
.space("div")
|
||||
.space("dl")
|
||||
.space("dt")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("li")
|
||||
.space("nav")
|
||||
.space("ol")
|
||||
.space("p")
|
||||
.space("pre")
|
||||
.space("section")
|
||||
.space("ul")
|
||||
.delete("iframe")
|
||||
.delete("noembed")
|
||||
.delete("noframes")
|
||||
.delete("noscript")
|
||||
.delete("script")
|
||||
.delete("style")
|
||||
}
|
||||
|
||||
/// Relaxed rules. Allows an even wider variety of markup, including images and tables
|
||||
pub fn relaxed() -> Rules {
|
||||
fn relaxed_element(name: &str) -> Element {
|
||||
Element::new(name)
|
||||
.attribute("dir", Pattern::any())
|
||||
.attribute("lang", Pattern::any())
|
||||
.attribute("title", Pattern::any())
|
||||
.attribute("class", Pattern::any())
|
||||
}
|
||||
|
||||
Rules::new()
|
||||
.element(relaxed_element("a").attribute("href", href()))
|
||||
.element(relaxed_element("abbr"))
|
||||
.element(relaxed_element("b"))
|
||||
.element(relaxed_element("bdo"))
|
||||
.element(relaxed_element("blockquote").attribute("cite", src()))
|
||||
.element(relaxed_element("br"))
|
||||
.element(relaxed_element("caption"))
|
||||
.element(relaxed_element("cite"))
|
||||
.element(relaxed_element("code"))
|
||||
.element(
|
||||
relaxed_element("col")
|
||||
.attribute("span", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(
|
||||
relaxed_element("colgroup")
|
||||
.attribute("span", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("dd"))
|
||||
.element(
|
||||
relaxed_element("del")
|
||||
.attribute("cite", src())
|
||||
.attribute("datetime", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("dfn"))
|
||||
.element(relaxed_element("dl"))
|
||||
.element(relaxed_element("dt"))
|
||||
.element(relaxed_element("em"))
|
||||
.element(relaxed_element("figcaption"))
|
||||
.element(relaxed_element("figure"))
|
||||
.element(relaxed_element("h1"))
|
||||
.element(relaxed_element("h2"))
|
||||
.element(relaxed_element("h3"))
|
||||
.element(relaxed_element("h4"))
|
||||
.element(relaxed_element("h5"))
|
||||
.element(relaxed_element("h6"))
|
||||
.element(relaxed_element("hgroup"))
|
||||
.element(relaxed_element("i"))
|
||||
.element(
|
||||
relaxed_element("img")
|
||||
.attribute("src", src())
|
||||
.attribute("align", Pattern::any())
|
||||
.attribute("alt", Pattern::any())
|
||||
.attribute("width", Pattern::any())
|
||||
.attribute("height", Pattern::any()),
|
||||
)
|
||||
.element(
|
||||
relaxed_element("ins")
|
||||
.attribute("cite", src())
|
||||
.attribute("datetime", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("kbd"))
|
||||
.element(relaxed_element("li"))
|
||||
.element(relaxed_element("mark"))
|
||||
.element(
|
||||
relaxed_element("ol")
|
||||
.attribute("start", Pattern::any())
|
||||
.attribute("reversed", Pattern::any())
|
||||
.attribute("type", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("p"))
|
||||
.element(relaxed_element("pre"))
|
||||
.element(relaxed_element("q").attribute("cite", src()))
|
||||
.element(relaxed_element("rp"))
|
||||
.element(relaxed_element("rt"))
|
||||
.element(relaxed_element("ruby"))
|
||||
.element(relaxed_element("s"))
|
||||
.element(relaxed_element("samp"))
|
||||
.element(relaxed_element("small"))
|
||||
.element(relaxed_element("strike"))
|
||||
.element(relaxed_element("strong"))
|
||||
.element(relaxed_element("sub"))
|
||||
.element(relaxed_element("sup"))
|
||||
.element(
|
||||
relaxed_element("table")
|
||||
.attribute("summary", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("tbody"))
|
||||
.element(
|
||||
relaxed_element("td")
|
||||
.attribute("abbr", Pattern::any())
|
||||
.attribute("axis", Pattern::any())
|
||||
.attribute("colspan", Pattern::any())
|
||||
.attribute("rowspan", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("tfoot"))
|
||||
.element(
|
||||
relaxed_element("th")
|
||||
.attribute("abbr", Pattern::any())
|
||||
.attribute("axis", Pattern::any())
|
||||
.attribute("colspan", Pattern::any())
|
||||
.attribute("rowspan", Pattern::any())
|
||||
.attribute("scope", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("thead"))
|
||||
.element(
|
||||
relaxed_element("time")
|
||||
.attribute("datetime", Pattern::any())
|
||||
.attribute("pubdate", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("tr"))
|
||||
.element(relaxed_element("u"))
|
||||
.element(relaxed_element("ul").attribute("type", Pattern::any()))
|
||||
.element(relaxed_element("var"))
|
||||
.element(relaxed_element("wbr"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("footer")
|
||||
.space("header")
|
||||
.space("hr")
|
||||
.space("nav")
|
||||
.space("section")
|
||||
}
|
||||
|
||||
/// Restricted rules. Allows only very simple inline markup. No links, images, or block elements.
|
||||
pub fn restricted() -> Rules {
|
||||
Rules::new()
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("em"))
|
||||
.element(Element::new("i"))
|
||||
.element(Element::new("strong"))
|
||||
.element(Element::new("u"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("blockquote")
|
||||
.space("br")
|
||||
.space("dd")
|
||||
.space("div")
|
||||
.space("dl")
|
||||
.space("dt")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("li")
|
||||
.space("nav")
|
||||
.space("ol")
|
||||
.space("p")
|
||||
.space("pre")
|
||||
.space("section")
|
||||
.space("ul")
|
||||
}
|
||||
|
||||
/// Rules for document from untrusted sources. Removes all tags but text emphasizing and links.
|
||||
pub fn untrusted() -> Rules {
|
||||
Rules::new()
|
||||
.element(
|
||||
Element::new("a")
|
||||
.attribute("href", href())
|
||||
.mandatory_attribute("target", "_blank")
|
||||
.mandatory_attribute("rel", "noreferrer noopener"),
|
||||
)
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("em"))
|
||||
.element(Element::new("i"))
|
||||
.element(Element::new("strong"))
|
||||
.element(Element::new("u"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("blockquote")
|
||||
.space("br")
|
||||
.space("dd")
|
||||
.space("div")
|
||||
.space("dl")
|
||||
.space("dt")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("li")
|
||||
.space("nav")
|
||||
.space("ol")
|
||||
.space("p")
|
||||
.space("pre")
|
||||
.space("section")
|
||||
.space("ul")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{basic, default, relaxed, restricted, untrusted};
|
||||
|
||||
#[test]
|
||||
fn basic_does_not_fail() {
|
||||
let rules = basic();
|
||||
assert_eq!(rules.allowed_elements.len(), 31);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_does_not_fail() {
|
||||
let rules = default();
|
||||
assert_eq!(rules.allowed_elements.len(), 0);
|
||||
assert_eq!(rules.space_elements.len(), 26);
|
||||
assert_eq!(rules.delete_elements.len(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relaxed_does_not_fail() {
|
||||
let rules = relaxed();
|
||||
assert_eq!(rules.allowed_elements.len(), 58);
|
||||
assert_eq!(rules.space_elements.len(), 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restricted_does_not_fail() {
|
||||
let rules = restricted();
|
||||
assert_eq!(rules.allowed_elements.len(), 5);
|
||||
assert_eq!(rules.space_elements.len(), 26);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn untrusted_does_not_fail() {
|
||||
let rules = untrusted();
|
||||
assert_eq!(rules.allowed_elements.len(), 6);
|
||||
assert_eq!(rules.space_elements.len(), 26);
|
||||
}
|
||||
}
|
|
@ -1,202 +0,0 @@
|
|||
use crate::rules::{Element, Rules};
|
||||
use html5ever::{interface::QualName, namespace_url, ns, LocalName};
|
||||
use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef};
|
||||
|
||||
fn simple_qual_name(name: &str) -> QualName {
|
||||
QualName::new(None, ns!(), LocalName::from(name))
|
||||
}
|
||||
|
||||
fn qual_name_to_string(name: &QualName) -> String {
|
||||
if name.ns == ns!(html) || name.ns.is_empty() {
|
||||
name.local.to_lowercase()
|
||||
} else {
|
||||
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
||||
}
|
||||
}
|
||||
|
||||
fn expanded_name_to_string(name: &ExpandedName) -> String {
|
||||
if name.ns == ns!(html) || name.ns.is_empty() {
|
||||
name.local.to_lowercase()
|
||||
} else {
|
||||
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
||||
}
|
||||
}
|
||||
|
||||
fn simple_element(
|
||||
name: QualName,
|
||||
attrs: Vec<(ExpandedName, Attribute)>,
|
||||
children: Vec<NodeRef>,
|
||||
) -> NodeRef {
|
||||
let element = NodeRef::new_element(name, attrs);
|
||||
for child in children {
|
||||
child.detach();
|
||||
element.append(child);
|
||||
}
|
||||
element
|
||||
}
|
||||
|
||||
fn create_space_text() -> NodeRef {
|
||||
NodeRef::new_text(" ")
|
||||
}
|
||||
|
||||
enum ElementAction<'t> {
|
||||
Keep(&'t Element),
|
||||
Delete,
|
||||
Space,
|
||||
Elide,
|
||||
Rename(&'t str),
|
||||
}
|
||||
|
||||
fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> {
|
||||
let name = qual_name_to_string(element_name);
|
||||
if name == "html" || name == "body" {
|
||||
ElementAction::Elide
|
||||
} else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) {
|
||||
ElementAction::Keep(element_sanitizer)
|
||||
} else if rules.delete_elements.contains(&name) {
|
||||
ElementAction::Delete
|
||||
} else if rules.space_elements.contains(&name) {
|
||||
ElementAction::Space
|
||||
} else if let Some(rename_to) = rules.rename_elements.get(&name) {
|
||||
ElementAction::Rename(rename_to)
|
||||
} else {
|
||||
ElementAction::Elide
|
||||
}
|
||||
}
|
||||
|
||||
fn clean_nodes(nodes: impl IntoIterator<Item = NodeRef>, rules: &Rules) -> Vec<NodeRef> {
|
||||
let mut result = Vec::new();
|
||||
for node in nodes {
|
||||
let subnodes = clean_node(&node, rules);
|
||||
result.extend(subnodes);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn clean_node(node: &NodeRef, rules: &Rules) -> Vec<NodeRef> {
|
||||
match node.data() {
|
||||
NodeData::Document(..) => vec![],
|
||||
NodeData::DocumentFragment => vec![], // TODO: ??
|
||||
NodeData::Doctype(..) => vec![],
|
||||
NodeData::ProcessingInstruction(..) => vec![],
|
||||
|
||||
NodeData::Text(..) => vec![node.clone()],
|
||||
|
||||
NodeData::Comment(..) => {
|
||||
if rules.allow_comments {
|
||||
vec![node.clone()]
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
NodeData::Element(ElementData {
|
||||
ref name,
|
||||
ref attributes,
|
||||
..
|
||||
}) => {
|
||||
match element_action(name, rules) {
|
||||
ElementAction::Keep(element_sanitizer) => {
|
||||
let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new();
|
||||
|
||||
/* whitelisted attributes */
|
||||
for (attr_name, attr_value) in attributes.borrow().map.iter() {
|
||||
let expanded_name = expanded_name_to_string(attr_name);
|
||||
|
||||
let new_value = if !element_sanitizer.attribute_rules.modify_attributes.contains_key(&expanded_name) {
|
||||
attr_value.clone()
|
||||
} else {
|
||||
let func = element_sanitizer.attribute_rules.modify_attributes.get(&expanded_name).unwrap();
|
||||
let new_value = func(attr_value.value.clone());
|
||||
Attribute {
|
||||
prefix: attr_value.prefix.clone(),
|
||||
value: new_value
|
||||
}
|
||||
};
|
||||
|
||||
if !element_sanitizer
|
||||
.is_valid(&expanded_name_to_string(attr_name), &new_value.value)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let name = &attr_name.local.to_string();
|
||||
let new_name = if element_sanitizer
|
||||
.attribute_rules
|
||||
.rename_attributes
|
||||
.contains_key(name)
|
||||
{
|
||||
ExpandedName::new(
|
||||
attr_name.ns.clone(),
|
||||
String::from(
|
||||
element_sanitizer
|
||||
.attribute_rules
|
||||
.rename_attributes
|
||||
.get(name)
|
||||
.unwrap(),
|
||||
),
|
||||
)
|
||||
} else {
|
||||
attr_name.clone()
|
||||
};
|
||||
|
||||
new_attrs.push((new_name, attr_value.clone()));
|
||||
}
|
||||
|
||||
/* mandatory attributes */
|
||||
let mut mandatory_attributes: Vec<(&String, &String)> =
|
||||
element_sanitizer.mandatory_attributes.iter().collect();
|
||||
mandatory_attributes.sort();
|
||||
for &(attr_name, attr_value) in mandatory_attributes.iter() {
|
||||
new_attrs.push((
|
||||
ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())),
|
||||
Attribute {
|
||||
prefix: None,
|
||||
value: attr_value.into(),
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
let children = clean_nodes(node.children(), rules);
|
||||
let element = simple_element(name.clone(), new_attrs, children);
|
||||
|
||||
vec![element]
|
||||
}
|
||||
|
||||
ElementAction::Delete => vec![],
|
||||
|
||||
ElementAction::Elide => clean_nodes(node.children(), rules),
|
||||
|
||||
ElementAction::Space => {
|
||||
let mut nodes = clean_nodes(node.children(), rules);
|
||||
if nodes.is_empty() {
|
||||
nodes.push(create_space_text());
|
||||
} else {
|
||||
nodes.insert(0, create_space_text());
|
||||
nodes.push(create_space_text());
|
||||
}
|
||||
nodes
|
||||
}
|
||||
|
||||
ElementAction::Rename(rename_to) => {
|
||||
let children = clean_nodes(node.children(), rules);
|
||||
vec![simple_element(
|
||||
simple_qual_name(rename_to),
|
||||
Vec::new(),
|
||||
children,
|
||||
)]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef {
|
||||
let new_children = clean_nodes(dom.children(), mode);
|
||||
let new_dom = NodeRef::new_document();
|
||||
for child in new_children {
|
||||
child.detach();
|
||||
new_dom.append(child);
|
||||
}
|
||||
new_dom
|
||||
}
|
|
@ -1,645 +0,0 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use super::rules::predefined::*;
|
||||
use super::rules::{Element, Rules};
|
||||
use super::sanitize_str;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert_eq!(&sanitize_str(&BASIC, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&DEFAULT, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&RELAXED, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&RESTRICTED, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&UNTRUSTED, "").unwrap(), "");
|
||||
}
|
||||
|
||||
/* basic */
|
||||
|
||||
const BASIC_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
|
||||
|
||||
#[test]
|
||||
fn basic_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, BASIC_HTML).unwrap(),
|
||||
"Lorem ipsum dolor sit amet "
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, BASIC_HTML).unwrap(),
|
||||
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, BASIC_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a href=\"pants\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, BASIC_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
/* malformed */
|
||||
|
||||
const MALFORMED_HTML: &str = "Lo<!-- comment -->rem</b> <a href=pants title=\"foo>ipsum <a href=\"http://foo.com/\"><strong>dolor</a></strong> sit<br/>amet <script>alert(\"hello world\");";
|
||||
|
||||
#[test]
|
||||
fn malformed_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, MALFORMED_HTML).unwrap(),
|
||||
"Lorem dolor sit amet "
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, MALFORMED_HTML).unwrap(),
|
||||
"Lorem <strong>dolor</strong> sit amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, MALFORMED_HTML).unwrap(),
|
||||
"Lorem <a href=\"pants\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, MALFORMED_HTML).unwrap(),
|
||||
"Lorem <a href=\"pants\" title=\"foo>ipsum <a href=\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
/* unclosed */
|
||||
|
||||
const UNCLOSED_HTML: &str = "<p>a</p><blockquote>b";
|
||||
|
||||
#[test]
|
||||
fn unclosed_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, UNCLOSED_HTML).unwrap(), " a b ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_restricted() {
|
||||
assert_eq!(&sanitize_str(&RESTRICTED, UNCLOSED_HTML).unwrap(), " a b ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, UNCLOSED_HTML).unwrap(),
|
||||
"<p>a</p><blockquote>b</blockquote>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, UNCLOSED_HTML).unwrap(),
|
||||
"<p>a</p><blockquote>b</blockquote>"
|
||||
);
|
||||
}
|
||||
|
||||
/* malicious */
|
||||
|
||||
const MALICIOUS_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert(\"hello world\");</script>";
|
||||
|
||||
#[test]
|
||||
fn malicious_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, MALICIOUS_HTML).unwrap(),
|
||||
"Lorem ipsum dolor sit amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a>ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_untrusted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&UNTRUSTED, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a rel=\"noreferrer noopener\" target=\"_blank\">ipsum</a> <a href=\"http://foo.com/\" rel=\"noreferrer noopener\" target=\"_blank\"><strong>dolor</strong></a> sit amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
/* raw-comment */
|
||||
|
||||
const RAW_COMMENT_HTML: &str = "<!-- comment -->Hello";
|
||||
|
||||
#[test]
|
||||
fn raw_comment_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_comment_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, RAW_COMMENT_HTML).unwrap(),
|
||||
"Hello"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_comment_basic() {
|
||||
assert_eq!(&sanitize_str(&BASIC, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_comment_relaxed() {
|
||||
assert_eq!(&sanitize_str(&RELAXED, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, no spaces */
|
||||
|
||||
const JS_INJECTION_HTML_1: &str = "<a href=\"javascript:alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_1).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_1).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_1).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_1).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, spaces before */
|
||||
|
||||
const JS_INJECTION_HTML_2: &str = "<a href=\"javascript :alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_2).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_2).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_2).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_2).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, spaces after */
|
||||
|
||||
const JS_INJECTION_HTML_3: &str = "<a href=\"javascript: alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_3).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_3).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_3).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_3).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, spaces before and after */
|
||||
|
||||
const JS_INJECTION_HTML_4: &str = "<a href=\"javascript : alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_4).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_4).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_4).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_4).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: preceding colon */
|
||||
|
||||
const JS_INJECTION_HTML_5: &str = "<a href=\":javascript:alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_5).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_5).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_5).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_5).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: UTF-8 encoding */
|
||||
|
||||
const JS_INJECTION_HTML_6: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_6).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_6).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_6).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_6).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: long UTF-8 encoding */
|
||||
|
||||
const JS_INJECTION_HTML_7: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_7).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_7).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_7).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_7).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: long UTF-8 encoding without semicolons */
|
||||
|
||||
const JS_INJECTION_HTML_8: &str = "<a href=javascript:alert('XSS')>foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_8).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_8).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_8).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_8).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: hex encoding */
|
||||
|
||||
const JS_INJECTION_HTML_9: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_9).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_9).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_9).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_9).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: long hex encoding */
|
||||
|
||||
const JS_INJECTION_HTML_10: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_10).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_10).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_10).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_10).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: hex encoding without semicolons */
|
||||
|
||||
const JS_INJECTION_HTML_11: &str = "<a href=javascript:alert('XSS')>foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_11).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_11).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_11).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_11).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* should translate valid HTML entities */
|
||||
|
||||
#[test]
|
||||
fn misc_1() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "Don't tasé me & bro!").unwrap(),
|
||||
"Don't tasé me & bro!"
|
||||
);
|
||||
}
|
||||
|
||||
/* should translate valid HTML entities while encoding unencoded ampersands */
|
||||
|
||||
#[test]
|
||||
fn misc_2() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "cookies² & ¼ créme").unwrap(),
|
||||
"cookies² & ¼ créme"
|
||||
);
|
||||
}
|
||||
|
||||
/* should never output ' */
|
||||
|
||||
#[test]
|
||||
fn misc_3() {
|
||||
assert_eq!(
|
||||
&sanitize_str(
|
||||
&DEFAULT,
|
||||
"<a href=''' class=\"' '\">IE6 isn't a real browser</a>"
|
||||
)
|
||||
.unwrap(),
|
||||
"IE6 isn't a real browser"
|
||||
);
|
||||
}
|
||||
|
||||
/* should not choke on several instances of the same element in a row */
|
||||
|
||||
#[test]
|
||||
fn misc_4() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "<img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\">").unwrap(),
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
/* should surround the contents of :whitespace_elements with space characters when removing the element */
|
||||
|
||||
#[test]
|
||||
fn misc_5() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "foo<div>bar</div>baz").unwrap(),
|
||||
"foo bar baz"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn misc_6() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "foo<br>bar<br>baz").unwrap(),
|
||||
"foo bar baz"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn misc_7() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "foo<hr>bar<hr>baz").unwrap(),
|
||||
"foo bar baz"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn custom_rules() {
|
||||
let rules = Rules::new()
|
||||
.allow_comments(true)
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("span"))
|
||||
.delete("script")
|
||||
.delete("style")
|
||||
.space("br")
|
||||
.rename("strong", "span");
|
||||
|
||||
let html = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\")</script>";
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_str(&rules, html).unwrap(),
|
||||
"<b>Lo<!-- comment -->rem</b> ipsum <span>dolor</span> sit amet "
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue