From abf4c787ab4c2d7f9421a765d9a94060272fedec Mon Sep 17 00:00:00 2001 From: Jacob Kiers Date: Tue, 2 Aug 2022 23:11:55 +0200 Subject: [PATCH] Manually remove images instead of parsing Instead of using all kinds of difficult libraries, just replace the string "src" with "data-source". This covers most cases of removing images. This also removes the previously inlined kuchiki and sanitize-html-rs libraries. Signed-off-by: Jacob Kiers --- Cargo.lock | 648 +----------------- Cargo.toml | 2 - bin/Cargo.toml | 1 - bin/src/main.rs | 28 +- kuchiki/.gitignore | 3 - kuchiki/.travis.yml | 6 - kuchiki/Cargo.toml | 22 - kuchiki/LICENSE | 23 - kuchiki/README.md | 10 - kuchiki/docs/.nojekyll | 0 kuchiki/docs/404.html | 3 - kuchiki/docs/index.html | 3 - kuchiki/examples/find_matches.rs | 48 -- kuchiki/examples/stack-overflow.rs | 22 - kuchiki/src/attributes.rs | 83 --- kuchiki/src/cell_extras.rs | 113 --- kuchiki/src/iter.rs | 452 ------------ kuchiki/src/lib.rs | 40 -- kuchiki/src/node_data_ref.rs | 116 ---- kuchiki/src/parser.rs | 241 ------- kuchiki/src/select.rs | 433 ------------ kuchiki/src/serializer.rs | 105 --- kuchiki/src/tests.rs | 185 ----- kuchiki/src/tree.rs | 489 ------------- kuchiki/test_data/foo.html | 9 - sanitize-html-rs/.github/workflows/build.yml | 31 - .../.github/workflows/coverage.yml | 27 - sanitize-html-rs/.github/workflows/style.yml | 24 - sanitize-html-rs/.gitignore | 4 - sanitize-html-rs/Cargo.toml | 16 - sanitize-html-rs/LICENSE.txt | 18 - sanitize-html-rs/README.md | 8 - sanitize-html-rs/src/errors.rs | 37 - sanitize-html-rs/src/lib.rs | 42 -- sanitize-html-rs/src/parse.rs | 38 - sanitize-html-rs/src/rules/mod.rs | 141 ---- sanitize-html-rs/src/rules/pattern.rs | 127 ---- sanitize-html-rs/src/rules/predefined.rs | 380 ---------- sanitize-html-rs/src/sanitize.rs | 202 ------ sanitize-html-rs/src/tests.rs | 645 ----------------- 40 files changed, 3 insertions(+), 4822 deletions(-) delete mode 100644 kuchiki/.gitignore delete mode 100644 kuchiki/.travis.yml delete mode 100644 kuchiki/Cargo.toml delete mode 100644 kuchiki/LICENSE delete mode 100644 kuchiki/README.md delete mode 100644 kuchiki/docs/.nojekyll delete mode 100644 kuchiki/docs/404.html delete mode 100644 kuchiki/docs/index.html delete mode 100644 kuchiki/examples/find_matches.rs delete mode 100644 kuchiki/examples/stack-overflow.rs delete mode 100644 kuchiki/src/attributes.rs delete mode 100644 kuchiki/src/cell_extras.rs delete mode 100644 kuchiki/src/iter.rs delete mode 100644 kuchiki/src/lib.rs delete mode 100644 kuchiki/src/node_data_ref.rs delete mode 100644 kuchiki/src/parser.rs delete mode 100644 kuchiki/src/select.rs delete mode 100644 kuchiki/src/serializer.rs delete mode 100644 kuchiki/src/tests.rs delete mode 100644 kuchiki/src/tree.rs delete mode 100644 kuchiki/test_data/foo.html delete mode 100644 sanitize-html-rs/.github/workflows/build.yml delete mode 100644 sanitize-html-rs/.github/workflows/coverage.yml delete mode 100644 sanitize-html-rs/.github/workflows/style.yml delete mode 100644 sanitize-html-rs/.gitignore delete mode 100644 sanitize-html-rs/Cargo.toml delete mode 100644 sanitize-html-rs/LICENSE.txt delete mode 100644 sanitize-html-rs/README.md delete mode 100644 sanitize-html-rs/src/errors.rs delete mode 100644 sanitize-html-rs/src/lib.rs delete mode 100644 sanitize-html-rs/src/parse.rs delete mode 100644 sanitize-html-rs/src/rules/mod.rs delete mode 100644 sanitize-html-rs/src/rules/pattern.rs delete mode 100644 sanitize-html-rs/src/rules/predefined.rs delete mode 100644 sanitize-html-rs/src/sanitize.rs delete mode 100644 sanitize-html-rs/src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index c49097f..22debc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,12 +62,6 @@ version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - [[package]] name = "cc" version = "1.0.73" @@ -93,12 +87,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "convert_case" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" - [[package]] name = "cpufeatures" version = "0.2.2" @@ -118,46 +106,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "cssparser" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" -dependencies = [ - "cssparser-macros", - "dtoa-short", - "itoa", - "matches", - "phf 0.8.0", - "proc-macro2", - "quote", - "smallvec", - "syn", -] - -[[package]] -name = "cssparser-macros" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" -dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "derive_more" -version = "0.99.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "rustc_version", - "syn", -] - [[package]] name = "digest" version = "0.10.3" @@ -168,21 +116,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "dtoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" - -[[package]] -name = "dtoa-short" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6" -dependencies = [ - "dtoa", -] - [[package]] name = "encoding_rs" version = "0.8.31" @@ -192,34 +125,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "fastrand" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" -dependencies = [ - "instant", -] - -[[package]] -name = "futf" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" -dependencies = [ - "mac", - "new_debug_unreachable", -] - -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generic-array" version = "0.14.5" @@ -230,48 +135,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", -] - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" - -[[package]] -name = "html5ever" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" -dependencies = [ - "log", - "mac", - "markup5ever", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "imap" version = "2.4.1" @@ -296,31 +159,6 @@ dependencies = [ "nom", ] -[[package]] -name = "indexmap" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - [[package]] name = "js-sys" version = "0.3.57" @@ -330,18 +168,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "kuchiki" -version = "0.8.1" -dependencies = [ - "cssparser", - "html5ever", - "indexmap", - "matches", - "selectors", - "tempfile", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -367,16 +193,6 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" -[[package]] -name = "lock_api" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.17" @@ -386,12 +202,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "mac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" - [[package]] name = "mail-parser" version = "0.5.0" @@ -402,38 +212,12 @@ dependencies = [ "serde", ] -[[package]] -name = "markup5ever" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" -dependencies = [ - "log", - "phf 0.10.1", - "phf_codegen 0.10.0", - "string_cache", - "string_cache_codegen", - "tendril", -] - -[[package]] -name = "matches" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" - [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" -[[package]] -name = "new_debug_unreachable" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" - [[package]] name = "newsletter-to-web" version = "0.1.0" @@ -442,16 +226,9 @@ dependencies = [ "imap", "mail-parser", "rustls-connector", - "sanitize_html", "sha2", ] -[[package]] -name = "nodrop" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" - [[package]] name = "nom" version = "5.1.2" @@ -488,139 +265,6 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "phf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" -dependencies = [ - "phf_macros", - "phf_shared 0.8.0", - "proc-macro-hack", -] - -[[package]] -name = "phf" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" -dependencies = [ - "phf_shared 0.10.0", -] - -[[package]] -name = "phf_codegen" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" -dependencies = [ - "phf_generator 0.8.0", - "phf_shared 0.8.0", -] - -[[package]] -name = "phf_codegen" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" -dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", -] - -[[package]] -name = "phf_generator" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" -dependencies = [ - "phf_shared 0.8.0", - "rand 0.7.3", -] - -[[package]] -name = "phf_generator" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" -dependencies = [ - "phf_shared 0.10.0", - "rand 0.8.5", -] - -[[package]] -name = "phf_macros" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" -dependencies = [ - "phf_generator 0.8.0", - "phf_shared 0.8.0", - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "phf_shared" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" -dependencies = [ - "siphasher", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" - -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - [[package]] name = "proc-macro2" version = "1.0.39" @@ -639,96 +283,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", - "rand_pcg", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom 0.2.6", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "rand_pcg" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" -dependencies = [ - "bitflags", -] - [[package]] name = "regex" version = "1.5.6" @@ -746,15 +300,6 @@ version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - [[package]] name = "ring" version = "0.16.20" @@ -770,15 +315,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustls" version = "0.20.6" @@ -809,22 +345,6 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" -[[package]] -name = "sanitize_html" -version = "0.7.0" -dependencies = [ - "html5ever", - "kuchiki", - "lazy_static", - "regex", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - [[package]] name = "sct" version = "0.7.0" @@ -835,32 +355,6 @@ dependencies = [ "untrusted", ] -[[package]] -name = "selectors" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" -dependencies = [ - "bitflags", - "cssparser", - "derive_more", - "fxhash", - "log", - "matches", - "phf 0.8.0", - "phf_codegen 0.8.0", - "precomputed-hash", - "servo_arc", - "smallvec", - "thin-slice", -] - -[[package]] -name = "semver" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41d061efea015927ac527063765e73601444cdc344ba855bc7bd44578b25e1c" - [[package]] name = "serde" version = "1.0.137" @@ -881,16 +375,6 @@ dependencies = [ "syn", ] -[[package]] -name = "servo_arc" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" -dependencies = [ - "nodrop", - "stable_deref_trait", -] - [[package]] name = "sha2" version = "0.10.2" @@ -902,62 +386,18 @@ dependencies = [ "digest", ] -[[package]] -name = "siphasher" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" - -[[package]] -name = "smallvec" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" - [[package]] name = "spin" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "string_cache" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" -dependencies = [ - "new_debug_unreachable", - "once_cell", - "parking_lot", - "phf_shared 0.10.0", - "precomputed-hash", - "serde", -] - -[[package]] -name = "string_cache_codegen" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" -dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", - "proc-macro2", - "quote", -] - [[package]] name = "syn" version = "1.0.96" @@ -969,37 +409,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tempfile" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" -dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "tendril" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" -dependencies = [ - "futf", - "mac", - "utf-8", -] - -[[package]] -name = "thin-slice" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" - [[package]] name = "time" version = "0.1.44" @@ -1007,7 +416,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", - "wasi 0.10.0+wasi-snapshot-preview1", + "wasi", "winapi", ] @@ -1029,24 +438,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -1157,46 +554,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" - -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" diff --git a/Cargo.toml b/Cargo.toml index a157627..24362da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,4 @@ members = [ "bin", - "sanitize-html-rs", - "kuchiki", ] diff --git a/bin/Cargo.toml b/bin/Cargo.toml index 07b2193..974b1df 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -11,5 +11,4 @@ base16ct = { version = "^0.1.0", features = [ "alloc" ] } imap = { version = "^2.4.1", default-features = false } mail-parser = "^0.5.0" rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] } -sanitize_html = { path = "../sanitize-html-rs" } sha2 = "^0.10.2" diff --git a/bin/src/main.rs b/bin/src/main.rs index 141f200..5a86117 100644 --- a/bin/src/main.rs +++ b/bin/src/main.rs @@ -8,13 +8,11 @@ use std::{ use mail_parser::Message as MpMessage; -use sanitize_html::{rules::Element, sanitize_str}; use sha2::{Digest, Sha256}; extern crate imap; extern crate mail_parser; extern crate rustls_connector; -extern crate sanitize_html; extern crate sha2; use message_reader::{EmailReader, TestMessagesReader}; @@ -50,9 +48,6 @@ fn main() { println!("Processing message {}", msg.get_uid()); let parsed = msg.get_parsed().expect("A parsed messsage."); - let title = parsed.get_subject().expect("Expected a subject"); - - println!("{}", &title); let html_body = parsed.get_html_body(0).expect("Could not read html body"); let processed_html = process_html(&html_body).expect("Could not process the HTML"); @@ -92,27 +87,8 @@ fn get_path(parsed: &MpMessage, msg: &Message) -> String { format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned() } -fn process_html(input: &str) -> Result { - let mut rules = sanitize_html::rules::predefined::relaxed().delete("style"); - - rules - .allowed_elements - .get_mut("img") - .unwrap() - .attribute_rules - .rename("src", "data-source"); - - let mut span = Element::new("span"); - - span.attribute_rules - .modify("style", Box::new(|_i| "".to_string())); - - let rules = rules.element(span); - - //rules.allowed_elements.remove_entry("img"); - - sanitize_str(&rules, input) - //Ok(input.to_owned()) +fn process_html(input: &str) -> Result { + Ok(input.replace("src", "data-source")) } fn write_to_test_path(msg: &Message) { diff --git a/kuchiki/.gitignore b/kuchiki/.gitignore deleted file mode 100644 index 884cb47..0000000 --- a/kuchiki/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -target -Cargo.lock -.cargo/config diff --git a/kuchiki/.travis.yml b/kuchiki/.travis.yml deleted file mode 100644 index 017d7c3..0000000 --- a/kuchiki/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -sudo: false -language: rust -rust: - - nightly - - beta - - stable diff --git a/kuchiki/Cargo.toml b/kuchiki/Cargo.toml deleted file mode 100644 index 2de668e..0000000 --- a/kuchiki/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "kuchiki" -version = "0.8.1" -authors = ["Simon Sapin "] -license = "MIT" -description = "(朽木) HTML/XML tree manipulation library" -repository = "https://github.com/kuchiki-rs/kuchiki" -edition = "2018" - -[lib] -name = "kuchiki" -doctest = false - -[dependencies] -cssparser = "^0.27" -matches = "^0.1.4" -html5ever = "^0.26" -selectors = "^0.22" -indexmap = "^1.6.0" - -[dev-dependencies] -tempfile = "3" diff --git a/kuchiki/LICENSE b/kuchiki/LICENSE deleted file mode 100644 index 31aa793..0000000 --- a/kuchiki/LICENSE +++ /dev/null @@ -1,23 +0,0 @@ -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/kuchiki/README.md b/kuchiki/README.md deleted file mode 100644 index 30d4f54..0000000 --- a/kuchiki/README.md +++ /dev/null @@ -1,10 +0,0 @@ -Kuchiki (朽木) -============== - -HTML/XML¹ tree manipulation library for Rust. - -[Documentation](https://docs.rs/kuchiki/) - -See [users.rust-lang.org discussion](http://users.rust-lang.org/t/kuchiki-a-vaporware-html-xml-tree-manipulation-library/435). - -¹ There is no support for XML syntax yet. The plan is to integrate with an existing parser. diff --git a/kuchiki/docs/.nojekyll b/kuchiki/docs/.nojekyll deleted file mode 100644 index e69de29..0000000 diff --git a/kuchiki/docs/404.html b/kuchiki/docs/404.html deleted file mode 100644 index 9fef978..0000000 --- a/kuchiki/docs/404.html +++ /dev/null @@ -1,3 +0,0 @@ - - -Moved to docs.rs diff --git a/kuchiki/docs/index.html b/kuchiki/docs/index.html deleted file mode 100644 index 9fef978..0000000 --- a/kuchiki/docs/index.html +++ /dev/null @@ -1,3 +0,0 @@ - - -Moved to docs.rs diff --git a/kuchiki/examples/find_matches.rs b/kuchiki/examples/find_matches.rs deleted file mode 100644 index 848e08e..0000000 --- a/kuchiki/examples/find_matches.rs +++ /dev/null @@ -1,48 +0,0 @@ -extern crate kuchiki; - -use kuchiki::traits::*; - -fn main() { - let html = r" - - - - -

Example

-

Hello, world!

-

I love HTML

- - - "; - let css_selector = ".foo"; - - let document = kuchiki::parse_html().one(html); - - for css_match in document.select(css_selector).unwrap() { - // css_match is a NodeDataRef, but most of the interesting methods are - // on NodeRef. Let's get the underlying NodeRef. - let as_node = css_match.as_node(); - - // In this example, as_node represents an HTML node like - // - //

Hello world!

" - // - // Which is distinct from just 'Hello world!'. To get rid of that

- // tag, we're going to get each element's first child, which will be - // a "text" node. - // - // There are other kinds of nodes, of course. The possibilities are all - // listed in the `NodeData` enum in this crate. - let text_node = as_node.first_child().unwrap(); - - // Let's get the actual text in this text node. A text node wraps around - // a RefCell, so we need to call borrow() to get a &str out. - let text = text_node.as_text().unwrap().borrow(); - - // Prints: - // - // "Hello, world!" - // "I love HTML" - println!("{:?}", text); - } -} diff --git a/kuchiki/examples/stack-overflow.rs b/kuchiki/examples/stack-overflow.rs deleted file mode 100644 index 535b702..0000000 --- a/kuchiki/examples/stack-overflow.rs +++ /dev/null @@ -1,22 +0,0 @@ -extern crate kuchiki; - -fn main() { - let mut depth = 2; - // 20 M nodes is a few GB of memory. - while depth <= 20_000_000 { - let mut node = kuchiki::NodeRef::new_text(""); - for _ in 0..depth { - let parent = kuchiki::NodeRef::new_text(""); - parent.append(node); - node = parent; - } - - println!("Trying to drop {} nodes...", depth); - // Without an explicit `impl Drop for Node`, - // depth = 20_000 causes "thread '

' has overflowed its stack" - // on my machine (Linux x86_64). - ::std::mem::drop(node); - - depth *= 10; - } -} diff --git a/kuchiki/src/attributes.rs b/kuchiki/src/attributes.rs deleted file mode 100644 index 655585e..0000000 --- a/kuchiki/src/attributes.rs +++ /dev/null @@ -1,83 +0,0 @@ -use html5ever::{LocalName, Namespace, Prefix}; -use indexmap::{map::Entry, IndexMap}; - -/// Convenience wrapper around a indexmap that adds method for attributes in the null namespace. -#[derive(Debug, PartialEq, Clone)] -pub struct Attributes { - /// A map of attributes whose name can have namespaces. - pub map: IndexMap, -} - -/// -#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] -pub struct ExpandedName { - /// Namespace URL - pub ns: Namespace, - /// "Local" part of the name - pub local: LocalName, -} - -impl ExpandedName { - /// Trivial constructor - pub fn new, L: Into>(ns: N, local: L) -> Self { - ExpandedName { - ns: ns.into(), - local: local.into(), - } - } -} - -/// The non-identifying parts of an attribute -#[derive(Debug, PartialEq, Clone)] -pub struct Attribute { - /// The namespace prefix, if any - pub prefix: Option, - /// The attribute value - pub value: String, -} - -impl Attributes { - /// Like IndexMap::contains - pub fn contains>(&self, local_name: A) -> bool { - self.map.contains_key(&ExpandedName::new(ns!(), local_name)) - } - - /// Like IndexMap::get - pub fn get>(&self, local_name: A) -> Option<&str> { - self.map - .get(&ExpandedName::new(ns!(), local_name)) - .map(|attr| &*attr.value) - } - - /// Like IndexMap::get_mut - pub fn get_mut>(&mut self, local_name: A) -> Option<&mut String> { - self.map - .get_mut(&ExpandedName::new(ns!(), local_name)) - .map(|attr| &mut attr.value) - } - - /// Like IndexMap::entry - pub fn entry>(&mut self, local_name: A) -> Entry { - self.map.entry(ExpandedName::new(ns!(), local_name)) - } - - /// Like IndexMap::insert - pub fn insert>( - &mut self, - local_name: A, - value: String, - ) -> Option { - self.map.insert( - ExpandedName::new(ns!(), local_name), - Attribute { - prefix: None, - value, - }, - ) - } - - /// Like IndexMap::remove - pub fn remove>(&mut self, local_name: A) -> Option { - self.map.remove(&ExpandedName::new(ns!(), local_name)) - } -} diff --git a/kuchiki/src/cell_extras.rs b/kuchiki/src/cell_extras.rs deleted file mode 100644 index 4c7538f..0000000 --- a/kuchiki/src/cell_extras.rs +++ /dev/null @@ -1,113 +0,0 @@ -//! Specialized methods for `Cell` of some specific `!Copy` types, -//! allowing limited access to a value without moving it of the cell. -//! -//! -//! # Soundness -//! -//! These methods use and `Cell::as_ptr` and `unsafe`. -//! Their soundness lies in that: -//! -//! * `Cell: !Sync` for any `T`, so no other thread is accessing this cell. -//! * For the duration of the raw pointer access, -//! this thread only runs code that is known to not access the same cell again. -//! In particular, no method of a type paramater is called. -//! For example, `clone_inner` would be unsound to generalize to any `Cell` -//! because it would involve running arbitrary code through `T::clone` -//! and provide that code with a reference to the inside of the cell. -//! -//! ```rust -//! struct Evil(Box, Rc>>); -//! impl Clone for Evil { -//! fn clone(&self) -> Self { -//! mem::drop(self.1.take()); // Mess with the "other" node, which might be `self`. -//! Evil( -//! self.0.clone(), // possible use after free! -//! Rc::new(Cell::new(None)) -//! ) -//! } -//! } -//! let a = Rc::new(Cell::new(None)); -//! a.set(Some(Evil(Box::new(5), a.clone()))); // Make a reference cycle. -//! a.clone_inner(); -//! ``` -//! -//! `Rc::clone` and `Weak::clone` do not have this problem -//! as they only increment reference counts and never call `T::clone`. -//! -//! -//! # Alternative -//! -//! To avoid using `unsafe` entirely, operating on a `T: !Copy` value inside a `Cell` -//! would require temporarily replacing it with a default value: -//! -//! ```rust -//! fn option_dance(cell: &Cell, f: F) -> R -//! where T: Default, F: FnOnce(&mut T) -> R -//! { -//! let mut value = cell.take(); -//! let result = f(&mut value); -//! cell.set(value); -//! result -//! } -//! ``` -//! -//! It would be worth exploring whether LLVM can reliably optimize away these extra moves -//! and compile the `Option` dance to assembly similar to that of the `unsafe` operation. - -use std::cell::Cell; -use std::rc::{Rc, Weak}; - -pub trait CellOption { - fn is_none(&self) -> bool; -} - -impl CellOption for Cell> { - #[inline] - fn is_none(&self) -> bool { - unsafe { (*self.as_ptr()).is_none() } - } -} - -pub trait CellOptionWeak { - fn upgrade(&self) -> Option>; - fn clone_inner(&self) -> Option>; -} - -impl CellOptionWeak for Cell>> { - #[inline] - fn upgrade(&self) -> Option> { - unsafe { (*self.as_ptr()).as_ref().and_then(Weak::upgrade) } - } - - #[inline] - fn clone_inner(&self) -> Option> { - unsafe { (*self.as_ptr()).clone() } - } -} - -pub trait CellOptionRc { - /// Return `Some` if this `Rc` is the only strong reference count, - /// even if there are weak references. - fn take_if_unique_strong(&self) -> Option>; - fn clone_inner(&self) -> Option>; -} - -impl CellOptionRc for Cell>> { - #[inline] - fn take_if_unique_strong(&self) -> Option> { - unsafe { - match *self.as_ptr() { - None => None, - Some(ref rc) if Rc::strong_count(rc) > 1 => None, - // Not borrowing the `Rc` here - // as we would be invalidating that borrow while it is outstanding: - Some(_) => self.take(), - } - } - } - - #[inline] - fn clone_inner(&self) -> Option> { - unsafe { (*self.as_ptr()).clone() } - } -} diff --git a/kuchiki/src/iter.rs b/kuchiki/src/iter.rs deleted file mode 100644 index 75fcfc4..0000000 --- a/kuchiki/src/iter.rs +++ /dev/null @@ -1,452 +0,0 @@ -//! Node iterators - -use std::borrow::Borrow; -use std::cell::RefCell; -use std::iter::Rev; - -use crate::node_data_ref::NodeDataRef; -use crate::select::Selectors; -use crate::tree::{ElementData, NodeRef}; - -impl NodeRef { - /// Return an iterator of references to this node and its ancestors. - #[inline] - pub fn inclusive_ancestors(&self) -> Ancestors { - Ancestors(Some(self.clone())) - } - - /// Return an iterator of references to this node’s ancestors. - #[inline] - pub fn ancestors(&self) -> Ancestors { - Ancestors(self.parent()) - } - - /// Return an iterator of references to this node and the siblings before it. - #[inline] - pub fn inclusive_preceding_siblings(&self) -> Rev { - match self.parent() { - Some(parent) => { - let first_sibling = parent.first_child().unwrap(); - debug_assert!(self.previous_sibling().is_some() || *self == first_sibling); - Siblings(Some(State { - next: first_sibling, - next_back: self.clone(), - })) - } - None => { - debug_assert!(self.previous_sibling().is_none()); - Siblings(Some(State { - next: self.clone(), - next_back: self.clone(), - })) - } - } - .rev() - } - - /// Return an iterator of references to this node’s siblings before it. - #[inline] - pub fn preceding_siblings(&self) -> Rev { - match (self.parent(), self.previous_sibling()) { - (Some(parent), Some(previous_sibling)) => { - let first_sibling = parent.first_child().unwrap(); - Siblings(Some(State { - next: first_sibling, - next_back: previous_sibling, - })) - } - _ => Siblings(None), - } - .rev() - } - - /// Return an iterator of references to this node and the siblings after it. - #[inline] - pub fn inclusive_following_siblings(&self) -> Siblings { - match self.parent() { - Some(parent) => { - let last_sibling = parent.last_child().unwrap(); - debug_assert!(self.next_sibling().is_some() || *self == last_sibling); - Siblings(Some(State { - next: self.clone(), - next_back: last_sibling, - })) - } - None => { - debug_assert!(self.next_sibling().is_none()); - Siblings(Some(State { - next: self.clone(), - next_back: self.clone(), - })) - } - } - } - - /// Return an iterator of references to this node’s siblings after it. - #[inline] - pub fn following_siblings(&self) -> Siblings { - match (self.parent(), self.next_sibling()) { - (Some(parent), Some(next_sibling)) => { - let last_sibling = parent.last_child().unwrap(); - Siblings(Some(State { - next: next_sibling, - next_back: last_sibling, - })) - } - _ => Siblings(None), - } - } - - /// Return an iterator of references to this node’s children. - #[inline] - pub fn children(&self) -> Siblings { - match (self.first_child(), self.last_child()) { - (Some(first_child), Some(last_child)) => Siblings(Some(State { - next: first_child, - next_back: last_child, - })), - (None, None) => Siblings(None), - _ => unreachable!(), - } - } - - /// Return an iterator of references to this node and its descendants, in tree order. - /// - /// Parent nodes appear before the descendants. - /// - /// Note: this is the `NodeEdge::Start` items from `traverse()`. - #[inline] - pub fn inclusive_descendants(&self) -> Descendants { - Descendants(self.traverse_inclusive()) - } - - /// Return an iterator of references to this node’s descendants, in tree order. - /// - /// Parent nodes appear before the descendants. - /// - /// Note: this is the `NodeEdge::Start` items from `traverse()`. - #[inline] - pub fn descendants(&self) -> Descendants { - Descendants(self.traverse()) - } - - /// Return an iterator of the start and end edges of this node and its descendants, - /// in tree order. - #[inline] - pub fn traverse_inclusive(&self) -> Traverse { - Traverse(Some(State { - next: NodeEdge::Start(self.clone()), - next_back: NodeEdge::End(self.clone()), - })) - } - - /// Return an iterator of the start and end edges of this node’s descendants, - /// in tree order. - #[inline] - pub fn traverse(&self) -> Traverse { - match (self.first_child(), self.last_child()) { - (Some(first_child), Some(last_child)) => Traverse(Some(State { - next: NodeEdge::Start(first_child), - next_back: NodeEdge::End(last_child), - })), - (None, None) => Traverse(None), - _ => unreachable!(), - } - } - - /// Return an iterator of the inclusive descendants element that match the given selector list. - #[inline] - pub fn select(&self, selectors: &str) -> Result>, ()> { - self.inclusive_descendants().select(selectors) - } - - /// Return the first inclusive descendants element that match the given selector list. - #[inline] - pub fn select_first(&self, selectors: &str) -> Result, ()> { - let mut elements = self.select(selectors)?; - elements.next().ok_or(()) - } -} - -#[derive(Debug, Clone)] -struct State { - next: T, - next_back: T, -} - -/// A double-ended iterator of sibling nodes. -#[derive(Debug, Clone)] -pub struct Siblings(Option>); - -macro_rules! siblings_next { - ($next: ident, $next_back: ident, $next_sibling: ident) => { - fn $next(&mut self) -> Option { - #![allow(non_shorthand_field_patterns)] - self.0.take().map(|State { $next: next, $next_back: next_back }| { - if let Some(sibling) = next.$next_sibling() { - if next != next_back { - self.0 = Some(State { $next: sibling, $next_back: next_back }) - } - } - next - }) - } - } -} - -impl Iterator for Siblings { - type Item = NodeRef; - siblings_next!(next, next_back, next_sibling); -} - -impl DoubleEndedIterator for Siblings { - siblings_next!(next_back, next, previous_sibling); -} - -/// An iterator on ancestor nodes. -#[derive(Debug, Clone)] -pub struct Ancestors(Option); - -impl Iterator for Ancestors { - type Item = NodeRef; - - #[inline] - fn next(&mut self) -> Option { - self.0.take().map(|node| { - self.0 = node.parent(); - node - }) - } -} - -/// An iterator of references to a given node and its descendants, in tree order. -#[derive(Debug, Clone)] -pub struct Descendants(Traverse); - -macro_rules! descendants_next { - ($next: ident) => { - #[inline] - fn $next(&mut self) -> Option { - loop { - match (self.0).$next() { - Some(NodeEdge::Start(node)) => return Some(node), - Some(NodeEdge::End(_)) => {} - None => return None - } - } - } - } -} - -impl Iterator for Descendants { - type Item = NodeRef; - descendants_next!(next); -} - -impl DoubleEndedIterator for Descendants { - descendants_next!(next_back); -} - -/// Marks either the start or the end of a node. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum NodeEdge { - /// Indicates that start of a node that has children. - /// Yielded by `Traverse::next` before the node’s descendants. - /// In HTML or XML, this corresponds to an opening tag like `
` - Start(T), - - /// Indicates that end of a node that has children. - /// Yielded by `Traverse::next` after the node’s descendants. - /// In HTML or XML, this corresponds to a closing tag like `
` - End(T), -} - -/// An iterator of the start and end edges of the nodes in a given subtree. -#[derive(Debug, Clone)] -pub struct Traverse(Option>>); - -macro_rules! traverse_next { - ($next: ident, $next_back: ident, $first_child: ident, $next_sibling: ident, $Start: ident, $End: ident) => { - fn $next(&mut self) -> Option> { - #![allow(non_shorthand_field_patterns)] - self.0.take().map(|State { $next: next, $next_back: next_back }| { - if next != next_back { - self.0 = match next { - NodeEdge::$Start(ref node) => { - match node.$first_child() { - Some(child) => { - Some(State { $next: NodeEdge::$Start(child), $next_back: next_back }) - } - None => Some(State { $next: NodeEdge::$End(node.clone()), $next_back: next_back }) - } - } - NodeEdge::$End(ref node) => { - match node.$next_sibling() { - Some(sibling) => { - Some(State { $next: NodeEdge::$Start(sibling), $next_back: next_back }) - } - None => node.parent().map(|parent| { - State { $next: NodeEdge::$End(parent), $next_back: next_back } - }) - } - } - }; - } - next - }) - } - } -} - -impl Iterator for Traverse { - type Item = NodeEdge; - traverse_next!(next, next_back, first_child, next_sibling, Start, End); -} - -impl DoubleEndedIterator for Traverse { - traverse_next!(next_back, next, last_child, previous_sibling, End, Start); -} - -macro_rules! filter_map_like_iterator { - (#[$doc: meta] $name: ident: $f: expr, $from: ty => $to: ty) => { - #[$doc] - #[derive(Debug, Clone)] - pub struct $name(pub I); - - impl Iterator for $name - where - I: Iterator, - { - type Item = $to; - - #[inline] - fn next(&mut self) -> Option<$to> { - for x in self.0.by_ref() { - if let Some(y) = ($f)(x) { - return Some(y); - } - } - None - } - } - - impl DoubleEndedIterator for $name - where - I: DoubleEndedIterator, - { - #[inline] - fn next_back(&mut self) -> Option<$to> { - for x in self.0.by_ref().rev() { - if let Some(y) = ($f)(x) { - return Some(y); - } - } - None - } - } - }; -} - -filter_map_like_iterator! { - /// A node iterator adaptor that yields element nodes. - Elements: NodeRef::into_element_ref, NodeRef => NodeDataRef -} - -filter_map_like_iterator! { - /// A node iterator adaptor that yields comment nodes. - Comments: NodeRef::into_comment_ref, NodeRef => NodeDataRef> -} - -filter_map_like_iterator! { - /// A node iterator adaptor that yields text nodes. - TextNodes: NodeRef::into_text_ref, NodeRef => NodeDataRef> -} - -/// An element iterator adaptor that yields elements maching given selectors. -pub struct Select -where - I: Iterator>, - S: Borrow, -{ - /// The underlying iterator. - pub iter: I, - - /// The selectors to be matched. - pub selectors: S, -} - -impl Iterator for Select -where - I: Iterator>, - S: Borrow, -{ - type Item = NodeDataRef; - - #[inline] - fn next(&mut self) -> Option> { - for element in self.iter.by_ref() { - if self.selectors.borrow().matches(&element) { - return Some(element); - } - } - None - } -} - -impl DoubleEndedIterator for Select -where - I: DoubleEndedIterator>, - S: Borrow, -{ - #[inline] - fn next_back(&mut self) -> Option> { - for element in self.iter.by_ref().rev() { - if self.selectors.borrow().matches(&element) { - return Some(element); - } - } - None - } -} - -/// Convenience methods for node iterators. -pub trait NodeIterator: Sized + Iterator { - /// Filter this element iterator to elements. - #[inline] - fn elements(self) -> Elements { - Elements(self) - } - - /// Filter this node iterator to text nodes. - #[inline] - fn text_nodes(self) -> TextNodes { - TextNodes(self) - } - - /// Filter this node iterator to comment nodes. - #[inline] - fn comments(self) -> Comments { - Comments(self) - } - - /// Filter this node iterator to elements maching the given selectors. - #[inline] - fn select(self, selectors: &str) -> Result>, ()> { - self.elements().select(selectors) - } -} - -/// Convenience methods for element iterators. -pub trait ElementIterator: Sized + Iterator> { - /// Filter this element iterator to elements maching the given selectors. - #[inline] - fn select(self, selectors: &str) -> Result, ()> { - Selectors::compile(selectors).map(|s| Select { - iter: self, - selectors: s, - }) - } -} - -impl NodeIterator for I where I: Iterator {} -impl ElementIterator for I where I: Iterator> {} diff --git a/kuchiki/src/lib.rs b/kuchiki/src/lib.rs deleted file mode 100644 index 2c862d9..0000000 --- a/kuchiki/src/lib.rs +++ /dev/null @@ -1,40 +0,0 @@ -/*! - -Kuchiki (朽木), a HTML/XML tree manipulation library for Rust. - -*/ - -#![deny(missing_docs)] - -#[macro_use] -extern crate html5ever; -#[macro_use] -extern crate matches; - -mod attributes; -mod cell_extras; -pub mod iter; -mod node_data_ref; -mod parser; -mod select; -mod serializer; -#[cfg(test)] -mod tests; -mod tree; - -pub use attributes::{Attribute, Attributes, ExpandedName}; -pub use node_data_ref::NodeDataRef; -pub use parser::{parse_html, parse_html_with_options, parse_fragment, ParseOpts, Sink}; -pub use select::{Selector, Selectors, Specificity}; -pub use tree::{Doctype, DocumentData, ElementData, Node, NodeData, NodeRef}; - -/// This module re-exports a number of traits that are useful when using Kuchiki. -/// It can be used with: -/// -/// ```rust -/// use kuchiki::traits::*; -/// ``` -pub mod traits { - pub use html5ever::tendril::TendrilSink; - pub use crate::iter::{ElementIterator, NodeIterator}; -} diff --git a/kuchiki/src/node_data_ref.rs b/kuchiki/src/node_data_ref.rs deleted file mode 100644 index 2cfd8b8..0000000 --- a/kuchiki/src/node_data_ref.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::cell::RefCell; -use std::fmt; -use std::ops::Deref; -use crate::tree::{Doctype, DocumentData, ElementData, Node, NodeRef}; - -impl NodeRef { - /// If this node is an element, return a strong reference to element-specific data. - #[inline] - pub fn into_element_ref(self) -> Option> { - NodeDataRef::new_opt(self, Node::as_element) - } - - /// If this node is a text node, return a strong reference to its contents. - #[inline] - pub fn into_text_ref(self) -> Option>> { - NodeDataRef::new_opt(self, Node::as_text) - } - - /// If this node is a comment, return a strong reference to its contents. - #[inline] - pub fn into_comment_ref(self) -> Option>> { - NodeDataRef::new_opt(self, Node::as_comment) - } - - /// If this node is a doctype, return a strong reference to doctype-specific data. - #[inline] - pub fn into_doctype_ref(self) -> Option> { - NodeDataRef::new_opt(self, Node::as_doctype) - } - - /// If this node is a document, return a strong reference to document-specific data. - #[inline] - pub fn into_document_ref(self) -> Option> { - NodeDataRef::new_opt(self, Node::as_document) - } -} - -/// Holds a strong reference to a node, but dereferences to some component inside of it. -#[derive(Eq)] -pub struct NodeDataRef { - _keep_alive: NodeRef, - _reference: *const T, -} - -impl NodeDataRef { - /// Create a `NodeDataRef` for a component in a given node. - #[inline] - pub fn new(rc: NodeRef, f: F) -> NodeDataRef - where - F: FnOnce(&Node) -> &T, - { - NodeDataRef { - _reference: f(&*rc), - _keep_alive: rc, - } - } - - /// Create a `NodeDataRef` for and a component that may or may not be in a given node. - #[inline] - pub fn new_opt(rc: NodeRef, f: F) -> Option> - where - F: FnOnce(&Node) -> Option<&T>, - { - f(&*rc).map(|r| r as *const T).map(move |r| NodeDataRef { - _reference: r, - _keep_alive: rc, - }) - } - - /// Access the corresponding node. - #[inline] - pub fn as_node(&self) -> &NodeRef { - &self._keep_alive - } -} - -impl Deref for NodeDataRef { - type Target = T; - #[inline] - fn deref(&self) -> &T { - unsafe { &*self._reference } - } -} - -// #[derive(PartialEq)] would compare both fields -impl PartialEq for NodeDataRef { - #[inline] - fn eq(&self, other: &Self) -> bool { - self._keep_alive == other._keep_alive - } -} - -// #[derive(Clone)] would have an unnecessary `T: Clone` bound -impl Clone for NodeDataRef { - #[inline] - fn clone(&self) -> Self { - NodeDataRef { - _keep_alive: self._keep_alive.clone(), - _reference: self._reference, - } - } -} - -impl fmt::Debug for NodeDataRef { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - fmt::Debug::fmt(&**self, f) - } -} - -impl NodeDataRef { - /// Return the concatenation of all text nodes in this subtree. - pub fn text_contents(&self) -> String { - self.as_node().text_contents() - } -} diff --git a/kuchiki/src/parser.rs b/kuchiki/src/parser.rs deleted file mode 100644 index 745f6ac..0000000 --- a/kuchiki/src/parser.rs +++ /dev/null @@ -1,241 +0,0 @@ -use html5ever::tendril::StrTendril; -use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; -use html5ever::{self, Attribute, ExpandedName, QualName}; -use std::borrow::Cow; - -use crate::attributes; -use crate::tree::NodeRef; - -/// Options for the HTML parser. -#[derive(Default)] -pub struct ParseOpts { - /// Options for the HTML tokenizer. - pub tokenizer: html5ever::tokenizer::TokenizerOpts, - - /// Options for the HTML tree builder. - pub tree_builder: html5ever::tree_builder::TreeBuilderOpts, - - /// A callback for HTML parse errors (which are never fatal). - pub on_parse_error: Option)>>, -} - -/// Parse an HTML document with html5ever and the default configuration. -pub fn parse_html() -> html5ever::Parser { - parse_html_with_options(ParseOpts::default()) -} - -/// Parse an HTML document with html5ever with custom configuration. -pub fn parse_html_with_options(opts: ParseOpts) -> html5ever::Parser { - let sink = Sink { - document_node: NodeRef::new_document(), - on_parse_error: opts.on_parse_error, - }; - let html5opts = html5ever::ParseOpts { - tokenizer: opts.tokenizer, - tree_builder: opts.tree_builder, - }; - html5ever::parse_document(sink, html5opts) -} - -/// Parse an HTML fragment with html5ever and the default configuration. -pub fn parse_fragment(ctx_name: QualName, ctx_attr: Vec) -> html5ever::Parser { - parse_fragment_with_options(ParseOpts::default(), ctx_name, ctx_attr) -} - -/// Parse an HTML fragment with html5ever with custom configuration. -pub fn parse_fragment_with_options(opts: ParseOpts, ctx_name: QualName, ctx_attr: Vec) -> html5ever::Parser { - let sink = Sink { - document_node: NodeRef::new_document(), - on_parse_error: opts.on_parse_error, - }; - let html5opts = html5ever::ParseOpts { - tokenizer: opts.tokenizer, - tree_builder: opts.tree_builder, - }; - html5ever::parse_fragment(sink, html5opts, ctx_name, ctx_attr) -} - -/// Receives new tree nodes during parsing. -pub struct Sink { - document_node: NodeRef, - on_parse_error: Option)>>, -} - -impl TreeSink for Sink { - type Output = NodeRef; - - fn finish(self) -> NodeRef { - self.document_node - } - - type Handle = NodeRef; - - #[inline] - fn parse_error(&mut self, message: Cow<'static, str>) { - if let Some(ref mut handler) = self.on_parse_error { - handler(message) - } - } - - #[inline] - fn get_document(&mut self) -> NodeRef { - self.document_node.clone() - } - - #[inline] - fn set_quirks_mode(&mut self, mode: QuirksMode) { - self.document_node - .as_document() - .unwrap() - ._quirks_mode - .set(mode) - } - - #[inline] - fn same_node(&self, x: &NodeRef, y: &NodeRef) -> bool { - x == y - } - - #[inline] - fn elem_name<'a>(&self, target: &'a NodeRef) -> ExpandedName<'a> { - target.as_element().unwrap().name.expanded() - } - - #[inline] - fn create_element( - &mut self, - name: QualName, - attrs: Vec, - _flags: ElementFlags, - ) -> NodeRef { - NodeRef::new_element( - name, - attrs.into_iter().map(|attr| { - let Attribute { - name: QualName { prefix, ns, local }, - value, - } = attr; - let value = String::from(value); - ( - attributes::ExpandedName { ns, local }, - attributes::Attribute { prefix, value }, - ) - }), - ) - } - - #[inline] - fn create_comment(&mut self, text: StrTendril) -> NodeRef { - NodeRef::new_comment(text) - } - - #[inline] - fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeRef { - NodeRef::new_processing_instruction(target, data) - } - - #[inline] - fn append(&mut self, parent: &NodeRef, child: NodeOrText) { - match child { - NodeOrText::AppendNode(node) => parent.append(node), - NodeOrText::AppendText(text) => { - if let Some(last_child) = parent.last_child() { - if let Some(existing) = last_child.as_text() { - existing.borrow_mut().push_str(&text); - return; - } - } - parent.append(NodeRef::new_text(text)) - } - } - } - - #[inline] - fn append_before_sibling(&mut self, sibling: &NodeRef, child: NodeOrText) { - match child { - NodeOrText::AppendNode(node) => sibling.insert_before(node), - NodeOrText::AppendText(text) => { - if let Some(previous_sibling) = sibling.previous_sibling() { - if let Some(existing) = previous_sibling.as_text() { - existing.borrow_mut().push_str(&text); - return; - } - } - sibling.insert_before(NodeRef::new_text(text)) - } - } - } - - #[inline] - fn append_doctype_to_document( - &mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril, - ) { - self.document_node - .append(NodeRef::new_doctype(name, public_id, system_id)) - } - - #[inline] - fn add_attrs_if_missing(&mut self, target: &NodeRef, attrs: Vec) { - let element = target.as_element().unwrap(); - let mut attributes = element.attributes.borrow_mut(); - - for Attribute { - name: QualName { prefix, ns, local }, - value, - } in attrs - { - attributes - .map - .entry(attributes::ExpandedName { ns, local }) - .or_insert_with(|| { - let value = String::from(value); - attributes::Attribute { prefix, value } - }); - } - } - - #[inline] - fn remove_from_parent(&mut self, target: &NodeRef) { - target.detach() - } - - #[inline] - fn reparent_children(&mut self, node: &NodeRef, new_parent: &NodeRef) { - // FIXME: Can this be done more effciently in rctree, - // by moving the whole linked list of children at once? - for child in node.children() { - new_parent.append(child) - } - } - - #[inline] - fn mark_script_already_started(&mut self, _node: &NodeRef) { - // FIXME: Is this useful outside of a browser? - } - - #[inline] - fn get_template_contents(&mut self, target: &NodeRef) -> NodeRef { - target - .as_element() - .unwrap() - .template_contents - .clone() - .unwrap() - } - - fn append_based_on_parent_node( - &mut self, - element: &NodeRef, - prev_element: &NodeRef, - child: NodeOrText, - ) { - if element.parent().is_some() { - self.append_before_sibling(element, child) - } else { - self.append(prev_element, child) - } - } -} diff --git a/kuchiki/src/select.rs b/kuchiki/src/select.rs deleted file mode 100644 index 3dea06a..0000000 --- a/kuchiki/src/select.rs +++ /dev/null @@ -1,433 +0,0 @@ -use crate::attributes::ExpandedName; -use cssparser::{self, CowRcStr, ParseError, SourceLocation, ToCss}; -use html5ever::{LocalName, Namespace}; -use crate::iter::{NodeIterator, Select}; -use crate::node_data_ref::NodeDataRef; -use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; -use selectors::context::QuirksMode; -use selectors::parser::SelectorParseErrorKind; -use selectors::parser::{ - NonTSPseudoClass, Parser, Selector as GenericSelector, SelectorImpl, SelectorList, -}; -use selectors::{self, matching, OpaqueElement}; -use std::fmt; -use crate::tree::{ElementData, Node, NodeData, NodeRef}; - -/// The definition of whitespace per CSS Selectors Level 3 § 4. -/// -/// Copied from rust-selectors. -static SELECTOR_WHITESPACE: &[char] = &[' ', '\t', '\n', '\r', '\x0C']; - -#[derive(Debug, Clone)] -pub struct KuchikiSelectors; - -impl SelectorImpl for KuchikiSelectors { - type AttrValue = String; - type Identifier = LocalName; - type ClassName = LocalName; - type LocalName = LocalName; - type PartName = LocalName; - type NamespacePrefix = LocalName; - type NamespaceUrl = Namespace; - type BorrowedNamespaceUrl = Namespace; - type BorrowedLocalName = LocalName; - - type NonTSPseudoClass = PseudoClass; - type PseudoElement = PseudoElement; - - type ExtraMatchingData = (); -} - -struct KuchikiParser; - -impl<'i> Parser<'i> for KuchikiParser { - type Impl = KuchikiSelectors; - type Error = SelectorParseErrorKind<'i>; - - fn parse_non_ts_pseudo_class( - &self, - location: SourceLocation, - name: CowRcStr<'i>, - ) -> Result>> { - use self::PseudoClass::*; - if name.eq_ignore_ascii_case("any-link") { - Ok(AnyLink) - } else if name.eq_ignore_ascii_case("link") { - Ok(Link) - } else if name.eq_ignore_ascii_case("visited") { - Ok(Visited) - } else if name.eq_ignore_ascii_case("active") { - Ok(Active) - } else if name.eq_ignore_ascii_case("focus") { - Ok(Focus) - } else if name.eq_ignore_ascii_case("hover") { - Ok(Hover) - } else if name.eq_ignore_ascii_case("enabled") { - Ok(Enabled) - } else if name.eq_ignore_ascii_case("disabled") { - Ok(Disabled) - } else if name.eq_ignore_ascii_case("checked") { - Ok(Checked) - } else if name.eq_ignore_ascii_case("indeterminate") { - Ok(Indeterminate) - } else { - Err( - location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( - name, - )), - ) - } - } -} - -#[derive(PartialEq, Eq, Clone, Debug, Hash)] -pub enum PseudoClass { - AnyLink, - Link, - Visited, - Active, - Focus, - Hover, - Enabled, - Disabled, - Checked, - Indeterminate, -} - -impl NonTSPseudoClass for PseudoClass { - type Impl = KuchikiSelectors; - - fn is_active_or_hover(&self) -> bool { - matches!(*self, PseudoClass::Active | PseudoClass::Hover) - } - - fn is_user_action_state(&self) -> bool { - matches!(*self, PseudoClass::Active | PseudoClass::Hover | PseudoClass::Focus) - } - - fn has_zero_specificity(&self) -> bool { - false - } -} - -impl ToCss for PseudoClass { - fn to_css(&self, dest: &mut W) -> fmt::Result - where - W: fmt::Write, - { - dest.write_str(match *self { - PseudoClass::AnyLink => ":any-link", - PseudoClass::Link => ":link", - PseudoClass::Visited => ":visited", - PseudoClass::Active => ":active", - PseudoClass::Focus => ":focus", - PseudoClass::Hover => ":hover", - PseudoClass::Enabled => ":enabled", - PseudoClass::Disabled => ":disabled", - PseudoClass::Checked => ":checked", - PseudoClass::Indeterminate => ":indeterminate", - }) - } -} - -#[derive(PartialEq, Eq, Clone, Debug, Hash)] -pub enum PseudoElement {} - -impl ToCss for PseudoElement { - fn to_css(&self, _dest: &mut W) -> fmt::Result - where - W: fmt::Write, - { - match *self {} - } -} - -impl selectors::parser::PseudoElement for PseudoElement { - type Impl = KuchikiSelectors; -} - -impl selectors::Element for NodeDataRef { - type Impl = KuchikiSelectors; - - #[inline] - fn opaque(&self) -> OpaqueElement { - let node: &Node = self.as_node(); - OpaqueElement::new(node) - } - - #[inline] - fn is_html_slot_element(&self) -> bool { - false - } - #[inline] - fn parent_node_is_shadow_root(&self) -> bool { - false - } - #[inline] - fn containing_shadow_host(&self) -> Option { - None - } - - #[inline] - fn parent_element(&self) -> Option { - self.as_node().parent().and_then(NodeRef::into_element_ref) - } - #[inline] - fn prev_sibling_element(&self) -> Option { - self.as_node().preceding_siblings().elements().next() - } - #[inline] - fn next_sibling_element(&self) -> Option { - self.as_node().following_siblings().elements().next() - } - #[inline] - fn is_empty(&self) -> bool { - self.as_node().children().all(|child| match *child.data() { - NodeData::Element(_) => false, - NodeData::Text(ref text) => text.borrow().is_empty(), - _ => true, - }) - } - #[inline] - fn is_root(&self) -> bool { - match self.as_node().parent() { - None => false, - Some(parent) => matches!(*parent.data(), NodeData::Document(_)), - } - } - - #[inline] - fn is_html_element_in_html_document(&self) -> bool { - // FIXME: Have a notion of HTML document v.s. XML document? - self.name.ns == ns!(html) - } - - #[inline] - fn has_local_name(&self, name: &LocalName) -> bool { - self.name.local == *name - } - #[inline] - fn has_namespace(&self, namespace: &Namespace) -> bool { - self.name.ns == *namespace - } - - #[inline] - fn is_part(&self, _name: &LocalName) -> bool { - false - } - - #[inline] - fn exported_part(&self, _: &LocalName) -> Option { - None - } - - #[inline] - fn imported_part(&self, _: &LocalName) -> Option { - None - } - - #[inline] - fn is_pseudo_element(&self) -> bool { - false - } - - #[inline] - fn is_same_type(&self, other: &Self) -> bool { - self.name == other.name - } - - #[inline] - fn is_link(&self) -> bool { - self.name.ns == ns!(html) - && matches!( - self.name.local, - local_name!("a") | local_name!("area") | local_name!("link") - ) - && self - .attributes - .borrow() - .map - .contains_key(&ExpandedName::new(ns!(), local_name!("href"))) - } - - #[inline] - fn has_id(&self, id: &LocalName, case_sensitivity: CaseSensitivity) -> bool { - self.attributes - .borrow() - .get(local_name!("id")) - .map_or(false, |id_attr| { - case_sensitivity.eq(id.as_bytes(), id_attr.as_bytes()) - }) - } - - #[inline] - fn has_class(&self, name: &LocalName, case_sensitivity: CaseSensitivity) -> bool { - let name = name.as_bytes(); - !name.is_empty() - && if let Some(class_attr) = self.attributes.borrow().get(local_name!("class")) { - class_attr - .split(SELECTOR_WHITESPACE) - .any(|class| case_sensitivity.eq(class.as_bytes(), name)) - } else { - false - } - } - - #[inline] - fn attr_matches( - &self, - ns: &NamespaceConstraint<&Namespace>, - local_name: &LocalName, - operation: &AttrSelectorOperation<&String>, - ) -> bool { - let attrs = self.attributes.borrow(); - match *ns { - NamespaceConstraint::Any => attrs - .map - .iter() - .any(|(name, attr)| name.local == *local_name && operation.eval_str(&attr.value)), - NamespaceConstraint::Specific(ns_url) => attrs - .map - .get(&ExpandedName::new(ns_url, local_name.clone())) - .map_or(false, |attr| operation.eval_str(&attr.value)), - } - } - - fn match_pseudo_element( - &self, - pseudo: &PseudoElement, - _context: &mut matching::MatchingContext, - ) -> bool { - match *pseudo {} - } - - fn match_non_ts_pseudo_class( - &self, - pseudo: &PseudoClass, - _context: &mut matching::MatchingContext, - _flags_setter: &mut F, - ) -> bool - where - F: FnMut(&Self, matching::ElementSelectorFlags), - { - use self::PseudoClass::*; - match *pseudo { - Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => { - false - } - AnyLink | Link => { - self.name.ns == ns!(html) - && matches!( - self.name.local, - local_name!("a") | local_name!("area") | local_name!("link") - ) - && self.attributes.borrow().contains(local_name!("href")) - } - } - } -} - -/// A pre-compiled list of CSS Selectors. -pub struct Selectors(pub Vec); - -/// A pre-compiled CSS Selector. -pub struct Selector(GenericSelector); - -/// The specificity of a selector. -/// -/// Opaque, but ordered. -/// -/// Determines precedence in the cascading algorithm. -/// When equal, a rule later in source order takes precedence. -#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)] -pub struct Specificity(u32); - -impl Selectors { - /// Compile a list of selectors. This may fail on syntax errors or unsupported selectors. - #[inline] - pub fn compile(s: &str) -> Result { - let mut input = cssparser::ParserInput::new(s); - match SelectorList::parse(&KuchikiParser, &mut cssparser::Parser::new(&mut input)) { - Ok(list) => Ok(Selectors(list.0.into_iter().map(Selector).collect())), - Err(_) => Err(()), - } - } - - /// Returns whether the given element matches this list of selectors. - #[inline] - pub fn matches(&self, element: &NodeDataRef) -> bool { - self.0.iter().any(|s| s.matches(element)) - } - - /// Filter an element iterator, yielding those matching this list of selectors. - #[inline] - pub fn filter(&self, iter: I) -> Select - where - I: Iterator>, - { - Select { - iter, - selectors: self, - } - } -} - -impl Selector { - /// Returns whether the given element matches this selector. - #[inline] - pub fn matches(&self, element: &NodeDataRef) -> bool { - let mut context = matching::MatchingContext::new( - matching::MatchingMode::Normal, - None, - None, - QuirksMode::NoQuirks, - ); - matching::matches_selector(&self.0, 0, None, element, &mut context, &mut |_, _| {}) - } - - /// Return the specificity of this selector. - pub fn specificity(&self) -> Specificity { - Specificity(self.0.specificity()) - } -} - -impl ::std::str::FromStr for Selectors { - type Err = (); - #[inline] - fn from_str(s: &str) -> Result { - Selectors::compile(s) - } -} - -impl fmt::Display for Selector { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.0.to_css(f) - } -} - -impl fmt::Display for Selectors { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut iter = self.0.iter(); - let first = iter - .next() - .expect("Empty Selectors, should contain at least one selector"); - first.0.to_css(f)?; - for selector in iter { - f.write_str(", ")?; - selector.0.to_css(f)?; - } - Ok(()) - } -} - -impl fmt::Debug for Selector { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl fmt::Debug for Selectors { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} diff --git a/kuchiki/src/serializer.rs b/kuchiki/src/serializer.rs deleted file mode 100644 index 4b4936c..0000000 --- a/kuchiki/src/serializer.rs +++ /dev/null @@ -1,105 +0,0 @@ -use html5ever::serialize::TraversalScope::*; -use html5ever::serialize::{serialize, Serialize, SerializeOpts, Serializer, TraversalScope}; -use html5ever::QualName; -use std::fs::File; -use std::io::{Result, Write}; -use std::path::Path; -use std::string::ToString; - -use crate::tree::{NodeData, NodeRef}; - -impl Serialize for NodeRef { - fn serialize( - &self, - serializer: &mut S, - traversal_scope: TraversalScope, - ) -> Result<()> { - match (traversal_scope, self.data()) { - (ref scope, &NodeData::Element(ref element)) => { - if *scope == IncludeNode { - let attrs = element.attributes.borrow(); - - // Unfortunately we need to allocate something to hold these &'a QualName - let attrs = attrs - .map - .iter() - .map(|(name, attr)| { - ( - QualName::new( - attr.prefix.clone(), - name.ns.clone(), - name.local.clone(), - ), - &attr.value, - ) - }) - .collect::>(); - - serializer.start_elem( - element.name.clone(), - attrs.iter().map(|&(ref name, value)| (name, &**value)), - )? - } - - for child in self.children() { - Serialize::serialize(&child, serializer, IncludeNode)? - } - - if *scope == IncludeNode { - serializer.end_elem(element.name.clone())? - } - Ok(()) - } - - (_, &NodeData::DocumentFragment) | (_, &NodeData::Document(_)) => { - for child in self.children() { - Serialize::serialize(&child, serializer, IncludeNode)? - } - Ok(()) - } - - (ChildrenOnly(_), _) => Ok(()), - - (IncludeNode, &NodeData::Doctype(ref doctype)) => { - serializer.write_doctype(&doctype.name) - } - (IncludeNode, &NodeData::Text(ref text)) => serializer.write_text(&text.borrow()), - (IncludeNode, &NodeData::Comment(ref text)) => serializer.write_comment(&text.borrow()), - (IncludeNode, &NodeData::ProcessingInstruction(ref contents)) => { - let contents = contents.borrow(); - serializer.write_processing_instruction(&contents.0, &contents.1) - } - } - } -} - -impl ToString for NodeRef { - #[inline] - fn to_string(&self) -> String { - let mut u8_vec = Vec::new(); - self.serialize(&mut u8_vec).unwrap(); - String::from_utf8(u8_vec).unwrap() - } -} - -impl NodeRef { - /// Serialize this node and its descendants in HTML syntax to the given stream. - #[inline] - pub fn serialize(&self, writer: &mut W) -> Result<()> { - serialize( - writer, - self, - SerializeOpts { - traversal_scope: IncludeNode, - ..Default::default() - }, - ) - } - - /// Serialize this node and its descendants in HTML syntax to a new file at the given path. - #[inline] - pub fn serialize_to_file>(&self, path: P) -> Result<()> { - let mut file = File::create(&path)?; - self.serialize(&mut file) - } -} diff --git a/kuchiki/src/tests.rs b/kuchiki/src/tests.rs deleted file mode 100644 index 1ccc1b2..0000000 --- a/kuchiki/src/tests.rs +++ /dev/null @@ -1,185 +0,0 @@ -use html5ever::tree_builder::QuirksMode; -use html5ever::QualName; -use std::path::Path; - -use tempfile::TempDir; - -use crate::parser::{parse_html, parse_fragment}; -use crate::select::*; -use crate::traits::*; - -#[test] -fn text_nodes() { - let html = r" - -Test case -

Content contains Important data

"; - let document = parse_html().one(html); - let paragraph = document.select("p").unwrap().collect::>(); - assert_eq!(paragraph.len(), 1); - assert_eq!( - paragraph[0].text_contents(), - "Content contains Important data" - ); - let texts = paragraph[0] - .as_node() - .descendants() - .text_nodes() - .collect::>(); - assert_eq!(texts.len(), 3); - assert_eq!(&*texts[0].borrow(), "Content contains "); - assert_eq!(&*texts[1].borrow(), "Important"); - assert_eq!(&*texts[2].borrow(), " data"); - { - let mut x = texts[0].borrow_mut(); - x.truncate(0); - x.push_str("Content doesn't contain "); - } - assert_eq!(&*texts[0].borrow(), "Content doesn't contain "); -} - -#[test] -fn parse_and_serialize() { - let html = r" - -Test case -

Content"; - let document = parse_html().one(html); - assert_eq!( - document.as_document().unwrap().quirks_mode(), - QuirksMode::NoQuirks - ); - assert_eq!( - document.to_string(), - r"Test case -

Content

" - ); -} - -#[test] -fn parse_and_serialize_fragment() { - let html = r"Test case"; - - let ctx_name = QualName::new(None, ns!(html), local_name!("tbody")); - let document = parse_fragment(ctx_name, vec![]).one(html); - assert_eq!(document.as_document().unwrap().quirks_mode(), QuirksMode::NoQuirks); - assert_eq!(document.to_string(), r"Test case"); -} - -#[test] -fn parse_file() { - let mut path = Path::new(env!("CARGO_MANIFEST_DIR")).to_path_buf(); - path.push("test_data".to_string()); - path.push("foo.html"); - - let html = r" - Test case - - -

Foo

- - -"; - let document = parse_html().from_utf8().from_file(&path).unwrap(); - assert_eq!(document.to_string(), html); -} - -#[test] -fn serialize_and_read_file() { - let tempdir = TempDir::new().unwrap(); - let mut path = tempdir.path().to_path_buf(); - path.push("temp.html"); - - let html = r"TitleBody"; - let document = parse_html().one(html); - let _ = document.serialize_to_file(path.clone()); - - let document2 = parse_html().from_utf8().from_file(&path).unwrap(); - assert_eq!(document.to_string(), document2.to_string()); -} - -#[test] -fn select() { - let html = r" -Test case -

Foo -

Bar -

Foo -"; - - let document = parse_html().one(html); - let matching = document.select("p.foo").unwrap().collect::>(); - assert_eq!(matching.len(), 2); - let child = matching[0].as_node().first_child().unwrap(); - assert_eq!(&**child.as_text().unwrap().borrow(), "Foo\n"); - assert_eq!(matching[0].attributes.borrow().get("class"), Some("foo")); - assert_eq!( - matching[0].attributes.borrow().get(local_name!("class")), - Some("foo") - ); - - let selectors = Selectors::compile("p.foo").unwrap(); - let matching2 = selectors - .filter(document.descendants().elements()) - .collect::>(); - assert_eq!(matching, matching2); -} - -#[test] -fn select_first() { - let html = r" -Test case -

Foo -

Bar -

Baz -"; - - let document = parse_html().one(html); - let matching = document.select_first("p.foo").unwrap(); - let child = matching.as_node().first_child().unwrap(); - assert_eq!(&**child.as_text().unwrap().borrow(), "Foo\n"); - assert_eq!(matching.attributes.borrow().get("class"), Some("foo")); - assert_eq!( - matching.attributes.borrow().get(local_name!("class")), - Some("foo") - ); - - assert!(document.select_first("p.bar").is_err()); -} - -#[test] -fn to_string() { - let html = r" - - - Test case - - -

Foo - -"; - - let document = parse_html().one(html); - assert_eq!( - document - .inclusive_descendants() - .nth(11) - .unwrap() - .to_string(), - "

Foo\n \n

" - ); -} - -#[test] -fn specificity() { - let selectors = Selectors::compile(".example, :first-child, div").unwrap(); - let specificities = selectors - .0 - .iter() - .map(|s| s.specificity()) - .collect::>(); - assert_eq!(specificities.len(), 3); - assert!(specificities[0] == specificities[1]); - assert!(specificities[0] > specificities[2]); - assert!(specificities[1] > specificities[2]); -} diff --git a/kuchiki/src/tree.rs b/kuchiki/src/tree.rs deleted file mode 100644 index 92483a2..0000000 --- a/kuchiki/src/tree.rs +++ /dev/null @@ -1,489 +0,0 @@ -use html5ever::tree_builder::QuirksMode; -use html5ever::QualName; -use std::cell::{Cell, RefCell}; -use std::fmt; -use std::ops::Deref; -use std::rc::{Rc, Weak}; - -use crate::attributes::{Attribute, Attributes, ExpandedName}; -use crate::cell_extras::*; -use crate::iter::NodeIterator; - -/// Node data specific to the node type. -#[derive(Debug, PartialEq, Clone)] -pub enum NodeData { - /// Element node - Element(ElementData), - - /// Text node - Text(RefCell), - - /// Comment node - Comment(RefCell), - - /// Processing instruction node - ProcessingInstruction(RefCell<(String, String)>), - - /// Doctype node - Doctype(Doctype), - - /// Document node - Document(DocumentData), - - /// Document fragment node - DocumentFragment, -} - -/// Data specific to doctype nodes. -#[derive(Debug, PartialEq, Clone)] -pub struct Doctype { - /// The name of the doctype - pub name: String, - - /// The public ID of the doctype - pub public_id: String, - - /// The system ID of the doctype - pub system_id: String, -} - -/// Data specific to element nodes. -#[derive(Debug, PartialEq, Clone)] -pub struct ElementData { - /// The namespace and local name of the element, such as `ns!(html)` and `body`. - pub name: QualName, - - /// The attributes of the elements. - pub attributes: RefCell, - - /// If the element is an HTML `