Add private fork of sanitize-html-rs

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-06-10 13:55:11 +02:00 · 2022-06-10 13:55:11 +02:00 · 4e3f7b46da
parent 36da496aa1
commit 4e3f7b46da
17 changed files with 1674 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -747,6 +747,16 @@ version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"

+[[package]]
+name = "sanitize_html"
+version = "0.7.0"
+dependencies = [
+ "html5ever",
+ "kuchiki",
+ "lazy_static",
+ "regex",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -2,4 +2,5 @@

 members = [
    "bin",
-]
+    "sanitize-html-rs",
+]
--- a/sanitize-html-rs/.github/workflows/build.yml
+++ b/sanitize-html-rs/.github/workflows/build.yml
@ -0,0 +1,31 @@
+name: Build
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    runs-on:  ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        rust:
+          - stable
+
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions-rs/toolchain@v1
+      with:
+        toolchain: ${{ matrix.rust }}
+        override: true
+    - name: Build
+      run: |
+        cargo build --all-targets --no-default-features --verbose
+        cargo build --all-targets --verbose
+    - name: Run tests
+      run: cargo test --all-targets --verbose
+      env:
+        RUST_BACKTRACE: 1
--- a/sanitize-html-rs/.github/workflows/coverage.yml
+++ b/sanitize-html-rs/.github/workflows/coverage.yml
@ -0,0 +1,27 @@
+name: Coverage
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions-rs/toolchain@v1
+      with:
+        toolchain: stable
+        override: true
+    - uses: actions-rs/install@v0.1
+      with:
+        crate: cargo-tarpaulin
+        use-tool-cache: true
+    - name: Run coverage
+      run: cargo tarpaulin -f -t 5 --out Xml -v -- --test-threads=1
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v1
+      with:
+        token: ${{secrets.CODECOV_TOKEN}}
--- a/sanitize-html-rs/.github/workflows/style.yml
+++ b/sanitize-html-rs/.github/workflows/style.yml
@ -0,0 +1,24 @@
+name: Style check
+
+on: [push, pull_request]
+
+jobs:
+  clippy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install clippy
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          components: clippy
+      - uses: actions-rs/clippy-check@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          args: --all --all-features
+  fmt:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v1
+      - name: Run fmt check
+        run: cargo fmt --all -- --check
--- a/sanitize-html-rs/.gitignore
+++ b/sanitize-html-rs/.gitignore
@ -0,0 +1,4 @@
+/target/
+**/*.rs.bk
+Cargo.lock
+/.vscode
--- a/sanitize-html-rs/Cargo.toml
+++ b/sanitize-html-rs/Cargo.toml
@ -0,0 +1,16 @@
+[package]
+name = "sanitize_html"
+version = "0.7.0"
+authors = ["Andrey Kutejko <andy128k@gmail.com>"]
+description = "Rule-based HTML Sanitization library"
+keywords = ["html", "sanitize"]
+license = "MIT"
+homepage = "https://github.com/andy128k/sanitize-html-rs"
+repository = "https://github.com/andy128k/sanitize-html-rs.git"
+edition = "2018"
+
+[dependencies]
+regex = "1"
+lazy_static = "1"
+html5ever = "0.25"
+kuchiki = "0.8"
--- a/sanitize-html-rs/LICENSE.txt
+++ b/sanitize-html-rs/LICENSE.txt
@ -0,0 +1,18 @@
+Copyright (c) 2017 Andrey Kutejko
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/sanitize-html-rs/README.md
+++ b/sanitize-html-rs/README.md
@ -0,0 +1,8 @@
+# Sanitize HTML
+
+[![Crates.io Status](https://img.shields.io/crates/v/sanitize_html.svg)](https://crates.io/crates/sanitize_html)
+[![Build](https://github.com/andy128k/sanitize-html-rs/workflows/Build/badge.svg?branch=master&event=push)](https://github.com/andy128k/sanitize-html-rs/actions?query=workflow%3ABuild)
+[![codecov](https://codecov.io/gh/andy128k/sanitize-html-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/andy128k/sanitize-html-rs)
+[![dependency status](https://deps.rs/repo/github/andy128k/sanitize-html-rs/status.svg)](https://deps.rs/repo/github/andy128k/sanitize-html-rs)
+
+This is a library for sanitization of HTML fragments.
--- a/sanitize-html-rs/src/errors.rs
+++ b/sanitize-html-rs/src/errors.rs
@ -0,0 +1,37 @@
+//! Error types, which can be emited by sanitization procedure.
+
+use std::error::Error;
+use std::fmt;
+
+/// Sanitization error
+#[derive(Debug)]
+pub enum SanitizeError {
+    /// UTF-8 decoding error
+    StrUtf8Error(std::str::Utf8Error),
+
+    /// UTF-8 decoding error
+    Utf8Error(std::string::FromUtf8Error),
+
+    /// Serialization error
+    SerializeError(std::io::Error),
+}
+
+impl fmt::Display for SanitizeError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            SanitizeError::StrUtf8Error(e) => write!(f, "UTF-8 decode error {}", e),
+            SanitizeError::Utf8Error(e) => write!(f, "UTF-8 decode error {}", e),
+            SanitizeError::SerializeError(e) => write!(f, "Serialization error {}", e),
+        }
+    }
+}
+
+impl Error for SanitizeError {
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            SanitizeError::StrUtf8Error(e) => Some(e),
+            SanitizeError::Utf8Error(e) => Some(e),
+            SanitizeError::SerializeError(e) => Some(e),
+        }
+    }
+}
--- a/sanitize-html-rs/src/lib.rs
+++ b/sanitize-html-rs/src/lib.rs
@ -0,0 +1,42 @@
+//! HTML Sanitization library
+//!
+//! # Examples
+//!
+//! ```
+//! use sanitize_html::sanitize_str;
+//! use sanitize_html::rules::predefined::DEFAULT;
+//!
+//! let input = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
+//!
+//! let sanitized_default: String = sanitize_str(&DEFAULT, input).unwrap();
+//! assert_eq!(&sanitized_default, "Lorem ipsum dolor sit amet ");
+//! ```
+
+#![deny(missing_docs)]
+
+pub mod errors;
+mod parse;
+pub mod rules;
+mod sanitize;
+mod tests;
+
+use crate::errors::SanitizeError;
+use crate::rules::Rules;
+
+/// Sanitize HTML bytes
+pub fn sanitize_bytes(rules: &Rules, input: &[u8]) -> Result<Vec<u8>, SanitizeError> {
+    let input_str = std::str::from_utf8(input).map_err(SanitizeError::StrUtf8Error)?;
+    let dom = parse::parse_str(input_str);
+    let new_dom = sanitize::sanitize_dom(&dom, rules);
+    let result_bytes = parse::unparse_bytes(&new_dom)?;
+    Ok(result_bytes)
+}
+
+/// Sanitize HTML string
+pub fn sanitize_str(rules: &Rules, input: &str) -> Result<String, SanitizeError> {
+    let dom = parse::parse_str(input);
+    let new_dom = sanitize::sanitize_dom(&dom, rules);
+    let result_bytes = parse::unparse_bytes(&new_dom)?;
+    let result_string = String::from_utf8(result_bytes).map_err(SanitizeError::Utf8Error)?;
+    Ok(result_string)
+}
--- a/sanitize-html-rs/src/parse.rs
+++ b/sanitize-html-rs/src/parse.rs
@ -0,0 +1,38 @@
+use super::errors::SanitizeError;
+use html5ever::{
+    interface::QualName,
+    local_name, namespace_prefix, namespace_url, ns, serialize,
+    serialize::{SerializeOpts, TraversalScope},
+    tendril::TendrilSink,
+};
+use kuchiki::{parse_html_with_options, NodeRef, ParseOpts};
+use std::default::Default;
+
+pub(crate) fn parse_str(input: &str) -> NodeRef {
+    let mut opts = ParseOpts::default();
+    opts.tree_builder.drop_doctype = true;
+
+    let mut parser = parse_html_with_options(opts);
+    parser.process(input.into());
+    parser.finish()
+}
+
+pub(crate) fn unparse_bytes(dom: &NodeRef) -> Result<Vec<u8>, SanitizeError> {
+    let mut buf: Vec<u8> = Vec::new();
+
+    let parent = QualName::new(
+        Some(namespace_prefix!("html")),
+        ns!(html),
+        local_name!("div"),
+    );
+
+    let opts = SerializeOpts {
+        scripting_enabled: false,
+        traversal_scope: TraversalScope::ChildrenOnly(Some(parent)),
+        create_missing_parent: false,
+    };
+
+    serialize(&mut buf, dom, opts).map_err(SanitizeError::SerializeError)?;
+
+    Ok(buf)
+}
--- a/sanitize-html-rs/src/rules/mod.rs
+++ b/sanitize-html-rs/src/rules/mod.rs
@ -0,0 +1,104 @@
+//! Structures to define sanitization rules.
+
+pub mod pattern;
+pub mod predefined;
+
+use self::pattern::Pattern;
+use std::collections::HashMap;
+use std::collections::HashSet;
+
+/// structure to describe HTML element
+pub struct Element {
+    /// name of an element
+    pub name: String,
+    /// Whitelist of allowed attributes
+    pub attributes: HashMap<String, Pattern>,
+    /// List of mandatory atributes and their values.
+    /// These attributes will be forcibly added to element.
+    pub mandatory_attributes: HashMap<String, String>,
+}
+
+impl Element {
+    /// Creates element descriptor
+    pub fn new(name: &str) -> Self {
+        Self {
+            name: name.to_owned(),
+            attributes: HashMap::new(),
+            mandatory_attributes: HashMap::new(),
+        }
+    }
+
+    /// Adds an attribute
+    pub fn attribute(mut self, attribute: &str, pattern: Pattern) -> Self {
+        self.attributes.insert(attribute.to_owned(), pattern);
+        self
+    }
+
+    /// Adds mandatory attribute
+    pub fn mandatory_attribute(mut self, attribute: &str, value: &str) -> Self {
+        self.mandatory_attributes
+            .insert(attribute.to_owned(), value.to_owned());
+        self
+    }
+
+    /// Checks if attribute is valid
+    pub fn is_valid(&self, attribute: &str, value: &str) -> bool {
+        match self.attributes.get(attribute) {
+            None => false,
+            Some(pattern) => pattern.matches(value),
+        }
+    }
+}
+
+/// structure to describe sanitization rules
+#[derive(Default)]
+pub struct Rules {
+    /// Determines if comments are kept of stripped out of a document.
+    pub allow_comments: bool,
+    /// Allowed elements.
+    pub allowed_elements: HashMap<String, Element>,
+    /// Elements which will be removed together with their children.
+    pub delete_elements: HashSet<String>,
+    /// Elements which will be replaced by spaces (Their children will be processed recursively).
+    pub space_elements: HashSet<String>,
+    /// Elements which will be renamed.
+    pub rename_elements: HashMap<String, String>,
+}
+
+impl Rules {
+    /// Creates a new rules set.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Sets if comments are allowed
+    pub fn allow_comments(mut self, allow_comments: bool) -> Self {
+        self.allow_comments = allow_comments;
+        self
+    }
+
+    /// Adds a rule for an allowed element
+    pub fn element(mut self, element: Element) -> Self {
+        self.allowed_elements.insert(element.name.clone(), element);
+        self
+    }
+
+    /// Adds a rule to delete an element
+    pub fn delete(mut self, element_name: &str) -> Self {
+        self.delete_elements.insert(element_name.to_owned());
+        self
+    }
+
+    /// Adds a rule to replace an element with space
+    pub fn space(mut self, element_name: &str) -> Self {
+        self.space_elements.insert(element_name.to_owned());
+        self
+    }
+
+    /// Adds a rule to rename an element
+    pub fn rename(mut self, element_name: &str, to: &str) -> Self {
+        self.rename_elements
+            .insert(element_name.to_owned(), to.to_owned());
+        self
+    }
+}
--- a/sanitize-html-rs/src/rules/pattern.rs
+++ b/sanitize-html-rs/src/rules/pattern.rs
@ -0,0 +1,127 @@
+//! This module contains code dedicated to check validity of attribute's value.
+//!
+//! # Examples
+//! ```
+//! use sanitize_html::rules::pattern::Pattern;
+//! use regex::Regex;
+//!
+//! let href = Pattern::regex(Regex::new("^(ftp:|http:|https:|mailto:)").unwrap()) |
+//!     !Pattern::regex(Regex::new("^[^/]+[[:space:]]*:").unwrap());
+//!
+//! assert!(href.matches("filename.xls"));
+//! assert!(href.matches("http://foo.com/"));
+//! assert!(href.matches(" filename with spaces .zip "));
+//! assert!(!href.matches(" javascript  : window.location = '//example.com/'")); // Attempt to make XSS
+//! ```
+
+use regex::Regex;
+
+/// Value pattern
+pub struct Pattern(pub Box<dyn Fn(&str) -> bool + Sync + Send>);
+
+impl Pattern {
+    /// Creates pattern which accepts any value.
+    ///
+    /// # Example
+    /// ```
+    /// use sanitize_html::rules::pattern::Pattern;
+    /// use regex::Regex;
+    ///
+    /// let pattern = Pattern::any();
+    /// assert!(pattern.matches(""));
+    /// assert!(pattern.matches("pants"));
+    /// ```
+    pub fn any() -> Self {
+        Pattern(Box::new(move |_value| true))
+    }
+
+    /// Creates pattern which uses regular expression to check a value. Panics
+    ///
+    /// # Example
+    /// ```
+    /// use sanitize_html::rules::pattern::Pattern;
+    /// use regex::Regex;
+    ///
+    /// let pattern = Pattern::regex(Regex::new("ant").unwrap());
+    /// assert!(!pattern.matches(""));
+    /// assert!(pattern.matches("pants"));
+    /// ```
+    pub fn regex(re: Regex) -> Self {
+        Pattern(Box::new(move |value| re.is_match(value)))
+    }
+
+    /// Checks if a value matches to a pattern.
+    pub fn matches(&self, value: &str) -> bool {
+        (self.0)(value)
+    }
+}
+
+impl ::std::ops::Not for Pattern {
+    type Output = Pattern;
+
+    /// Negates pattern
+    ///
+    /// # Example
+    /// ```
+    /// use sanitize_html::rules::pattern::Pattern;
+    /// use regex::Regex;
+    ///
+    /// let pattern = !Pattern::any();
+    /// assert!(!pattern.matches(""));
+    /// assert!(!pattern.matches("pants"));
+    /// ```
+    fn not(self) -> Self::Output {
+        let cb = self.0;
+        Pattern(Box::new(move |value| !cb(value)))
+    }
+}
+
+impl ::std::ops::BitAnd for Pattern {
+    type Output = Pattern;
+
+    /// Combines two patterns into a pattern which matches a string iff both patterns match that string.
+    ///
+    /// # Example
+    /// ```
+    /// use sanitize_html::rules::pattern::Pattern;
+    /// use regex::Regex;
+    ///
+    /// let pan = Pattern::regex(Regex::new("pan").unwrap());
+    /// let ant = Pattern::regex(Regex::new("ant").unwrap());
+    /// let pattern = pan & ant;
+    ///
+    /// assert!(!pattern.matches("pan"));
+    /// assert!(!pattern.matches("ant"));
+    /// assert!(pattern.matches("pants"));
+    /// ```
+    fn bitand(self, rhs: Pattern) -> Self::Output {
+        let cb1 = self.0;
+        let cb2 = rhs.0;
+        Pattern(Box::new(move |value| cb1(value) && cb2(value)))
+    }
+}
+
+impl ::std::ops::BitOr for Pattern {
+    type Output = Pattern;
+
+    /// Combines two patterns into a pattern which matches a string if one of patterns matches that string.
+    ///
+    /// # Example
+    /// ```
+    /// use sanitize_html::rules::pattern::Pattern;
+    /// use regex::Regex;
+    ///
+    /// let pan = Pattern::regex(Regex::new("pan").unwrap());
+    /// let pot = Pattern::regex(Regex::new("pot").unwrap());
+    /// let pattern = pan | pot;
+    ///
+    /// assert!(pattern.matches("pants"));
+    /// assert!(pattern.matches("pot"));
+    /// assert!(!pattern.matches("jar"));
+    /// ```
+    fn bitor(self, rhs: Pattern) -> Self::Output {
+        let cb1 = self.0;
+        let cb2 = rhs.0;
+        Pattern(Box::new(move |value| cb1(value) || cb2(value)))
+    }
+}
--- a/sanitize-html-rs/src/rules/predefined.rs
+++ b/sanitize-html-rs/src/rules/predefined.rs
@ -0,0 +1,374 @@
+//! Predefined rules
+//!
+//! These rules are inspired by a great Ruby gem [sanitize](https://github.com/rgrove/sanitize/).
+
+use super::pattern::Pattern;
+use super::{Element, Rules};
+use lazy_static::lazy_static;
+use regex::Regex;
+
+fn re(regex: &str) -> Pattern {
+    Pattern::regex(Regex::new(regex).unwrap())
+}
+
+fn href() -> Pattern {
+    re("^(ftp:|http:|https:|mailto:)") | !re("^[^/]+[[:space:]]*:")
+}
+
+fn src() -> Pattern {
+    re("^(http:|https:)") | !re("^[^/]+[[:space:]]*:")
+}
+
+lazy_static! {
+    /// Basic rules. Allows a variety of markup including formatting elements, links, and lists.
+    pub static ref BASIC: Rules = basic();
+
+    /// Default rules. Removes all tags.
+    pub static ref DEFAULT: Rules = default();
+
+    /// Relaxed rules. Allows an even wider variety of markup, including images and tables
+    pub static ref RELAXED: Rules = relaxed();
+
+    /// Restricted rules. Allows only very simple inline markup. No links, images, or block elements.
+    pub static ref RESTRICTED: Rules = restricted();
+
+    /// Rules for document from untrusted sources. Removes all tags but text emphasizing and links.
+    pub static ref UNTRUSTED: Rules = untrusted();
+}
+
+fn basic() -> Rules {
+    Rules::new()
+        .element(Element::new("a").attribute("href", href()))
+        .element(Element::new("abbr").attribute("title", Pattern::any()))
+        .element(Element::new("b"))
+        .element(Element::new("blockquote").attribute("cite", src()))
+        .element(Element::new("br"))
+        .element(Element::new("br"))
+        .element(Element::new("cite"))
+        .element(Element::new("code"))
+        .element(Element::new("dd"))
+        .element(Element::new("dfn").attribute("title", Pattern::any()))
+        .element(Element::new("dl"))
+        .element(Element::new("dt"))
+        .element(Element::new("em"))
+        .element(Element::new("i"))
+        .element(Element::new("kbd"))
+        .element(Element::new("li"))
+        .element(Element::new("mark"))
+        .element(Element::new("ol"))
+        .element(Element::new("p"))
+        .element(Element::new("pre"))
+        .element(Element::new("q").attribute("cite", src()))
+        .element(Element::new("s"))
+        .element(Element::new("samp"))
+        .element(Element::new("small"))
+        .element(Element::new("strike"))
+        .element(Element::new("strong"))
+        .element(Element::new("sub"))
+        .element(Element::new("sup"))
+        .element(
+            Element::new("time")
+                .attribute("datetime", Pattern::any())
+                .attribute("pubdate", Pattern::any()),
+        )
+        .element(Element::new("u"))
+        .element(Element::new("ul"))
+        .element(Element::new("var"))
+        .space("address")
+        .space("article")
+        .space("aside")
+        .space("div")
+        .space("footer")
+        .space("h1")
+        .space("h2")
+        .space("h3")
+        .space("h4")
+        .space("h5")
+        .space("h6")
+        .space("header")
+        .space("hgroup")
+        .space("hr")
+        .space("nav")
+        .space("section")
+}
+
+fn default() -> Rules {
+    Rules::new()
+        .space("address")
+        .space("article")
+        .space("aside")
+        .space("blockquote")
+        .space("br")
+        .space("dd")
+        .space("div")
+        .space("dl")
+        .space("dt")
+        .space("footer")
+        .space("h1")
+        .space("h2")
+        .space("h3")
+        .space("h4")
+        .space("h5")
+        .space("h6")
+        .space("header")
+        .space("hgroup")
+        .space("hr")
+        .space("li")
+        .space("nav")
+        .space("ol")
+        .space("p")
+        .space("pre")
+        .space("section")
+        .space("ul")
+        .delete("iframe")
+        .delete("noembed")
+        .delete("noframes")
+        .delete("noscript")
+        .delete("script")
+        .delete("style")
+}
+
+fn relaxed() -> Rules {
+    fn relaxed_element(name: &str) -> Element {
+        Element::new(name)
+            .attribute("dir", Pattern::any())
+            .attribute("lang", Pattern::any())
+            .attribute("title", Pattern::any())
+            .attribute("class", Pattern::any())
+    }
+
+    Rules::new()
+        .element(relaxed_element("a").attribute("href", href()))
+        .element(relaxed_element("abbr"))
+        .element(relaxed_element("b"))
+        .element(relaxed_element("bdo"))
+        .element(relaxed_element("blockquote").attribute("cite", src()))
+        .element(relaxed_element("br"))
+        .element(relaxed_element("caption"))
+        .element(relaxed_element("cite"))
+        .element(relaxed_element("code"))
+        .element(
+            relaxed_element("col")
+                .attribute("span", Pattern::any())
+                .attribute("width", Pattern::any()),
+        )
+        .element(
+            relaxed_element("colgroup")
+                .attribute("span", Pattern::any())
+                .attribute("width", Pattern::any()),
+        )
+        .element(relaxed_element("dd"))
+        .element(
+            relaxed_element("del")
+                .attribute("cite", src())
+                .attribute("datetime", Pattern::any()),
+        )
+        .element(relaxed_element("dfn"))
+        .element(relaxed_element("dl"))
+        .element(relaxed_element("dt"))
+        .element(relaxed_element("em"))
+        .element(relaxed_element("figcaption"))
+        .element(relaxed_element("figure"))
+        .element(relaxed_element("h1"))
+        .element(relaxed_element("h2"))
+        .element(relaxed_element("h3"))
+        .element(relaxed_element("h4"))
+        .element(relaxed_element("h5"))
+        .element(relaxed_element("h6"))
+        .element(relaxed_element("hgroup"))
+        .element(relaxed_element("i"))
+        .element(
+            relaxed_element("img")
+                .attribute("src", src())
+                .attribute("align", Pattern::any())
+                .attribute("alt", Pattern::any())
+                .attribute("width", Pattern::any())
+                .attribute("height", Pattern::any()),
+        )
+        .element(
+            relaxed_element("ins")
+                .attribute("cite", src())
+                .attribute("datetime", Pattern::any()),
+        )
+        .element(relaxed_element("kbd"))
+        .element(relaxed_element("li"))
+        .element(relaxed_element("mark"))
+        .element(
+            relaxed_element("ol")
+                .attribute("start", Pattern::any())
+                .attribute("reversed", Pattern::any())
+                .attribute("type", Pattern::any()),
+        )
+        .element(relaxed_element("p"))
+        .element(relaxed_element("pre"))
+        .element(relaxed_element("q").attribute("cite", src()))
+        .element(relaxed_element("rp"))
+        .element(relaxed_element("rt"))
+        .element(relaxed_element("ruby"))
+        .element(relaxed_element("s"))
+        .element(relaxed_element("samp"))
+        .element(relaxed_element("small"))
+        .element(relaxed_element("strike"))
+        .element(relaxed_element("strong"))
+        .element(relaxed_element("sub"))
+        .element(relaxed_element("sup"))
+        .element(
+            relaxed_element("table")
+                .attribute("summary", Pattern::any())
+                .attribute("width", Pattern::any()),
+        )
+        .element(relaxed_element("tbody"))
+        .element(
+            relaxed_element("td")
+                .attribute("abbr", Pattern::any())
+                .attribute("axis", Pattern::any())
+                .attribute("colspan", Pattern::any())
+                .attribute("rowspan", Pattern::any())
+                .attribute("width", Pattern::any()),
+        )
+        .element(relaxed_element("tfoot"))
+        .element(
+            relaxed_element("th")
+                .attribute("abbr", Pattern::any())
+                .attribute("axis", Pattern::any())
+                .attribute("colspan", Pattern::any())
+                .attribute("rowspan", Pattern::any())
+                .attribute("scope", Pattern::any())
+                .attribute("width", Pattern::any()),
+        )
+        .element(relaxed_element("thead"))
+        .element(
+            relaxed_element("time")
+                .attribute("datetime", Pattern::any())
+                .attribute("pubdate", Pattern::any()),
+        )
+        .element(relaxed_element("tr"))
+        .element(relaxed_element("u"))
+        .element(relaxed_element("ul").attribute("type", Pattern::any()))
+        .element(relaxed_element("var"))
+        .element(relaxed_element("wbr"))
+        .space("address")
+        .space("article")
+        .space("aside")
+        .space("footer")
+        .space("header")
+        .space("hr")
+        .space("nav")
+        .space("section")
+}
+
+fn restricted() -> Rules {
+    Rules::new()
+        .element(Element::new("b"))
+        .element(Element::new("em"))
+        .element(Element::new("i"))
+        .element(Element::new("strong"))
+        .element(Element::new("u"))
+        .space("address")
+        .space("article")
+        .space("aside")
+        .space("blockquote")
+        .space("br")
+        .space("dd")
+        .space("div")
+        .space("dl")
+        .space("dt")
+        .space("footer")
+        .space("h1")
+        .space("h2")
+        .space("h3")
+        .space("h4")
+        .space("h5")
+        .space("h6")
+        .space("header")
+        .space("hgroup")
+        .space("hr")
+        .space("li")
+        .space("nav")
+        .space("ol")
+        .space("p")
+        .space("pre")
+        .space("section")
+        .space("ul")
+}
+
+fn untrusted() -> Rules {
+    Rules::new()
+        .element(
+            Element::new("a")
+                .attribute("href", href())
+                .mandatory_attribute("target", "_blank")
+                .mandatory_attribute("rel", "noreferrer noopener"),
+        )
+        .element(Element::new("b"))
+        .element(Element::new("em"))
+        .element(Element::new("i"))
+        .element(Element::new("strong"))
+        .element(Element::new("u"))
+        .space("address")
+        .space("article")
+        .space("aside")
+        .space("blockquote")
+        .space("br")
+        .space("dd")
+        .space("div")
+        .space("dl")
+        .space("dt")
+        .space("footer")
+        .space("h1")
+        .space("h2")
+        .space("h3")
+        .space("h4")
+        .space("h5")
+        .space("h6")
+        .space("header")
+        .space("hgroup")
+        .space("hr")
+        .space("li")
+        .space("nav")
+        .space("ol")
+        .space("p")
+        .space("pre")
+        .space("section")
+        .space("ul")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{basic, default, relaxed, restricted, untrusted};
+
+    #[test]
+    fn basic_does_not_fail() {
+        let rules = basic();
+        assert_eq!(rules.allowed_elements.len(), 31);
+    }
+
+    #[test]
+    fn default_does_not_fail() {
+        let rules = default();
+        assert_eq!(rules.allowed_elements.len(), 0);
+        assert_eq!(rules.space_elements.len(), 26);
+        assert_eq!(rules.delete_elements.len(), 6);
+    }
+
+    #[test]
+    fn relaxed_does_not_fail() {
+        let rules = relaxed();
+        assert_eq!(rules.allowed_elements.len(), 58);
+        assert_eq!(rules.space_elements.len(), 8);
+    }
+
+    #[test]
+    fn restricted_does_not_fail() {
+        let rules = restricted();
+        assert_eq!(rules.allowed_elements.len(), 5);
+        assert_eq!(rules.space_elements.len(), 26);
+    }
+
+    #[test]
+    fn untrusted_does_not_fail() {
+        let rules = untrusted();
+        assert_eq!(rules.allowed_elements.len(), 6);
+        assert_eq!(rules.space_elements.len(), 26);
+    }
+}
--- a/sanitize-html-rs/src/sanitize.rs
+++ b/sanitize-html-rs/src/sanitize.rs
@ -0,0 +1,167 @@
+use crate::rules::{Element, Rules};
+use html5ever::{interface::QualName, namespace_url, ns, LocalName};
+use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef};
+
+fn simple_qual_name(name: &str) -> QualName {
+    QualName::new(None, ns!(), LocalName::from(name))
+}
+
+fn qual_name_to_string(name: &QualName) -> String {
+    if name.ns == ns!(html) || name.ns.is_empty() {
+        name.local.to_lowercase()
+    } else {
+        format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
+    }
+}
+
+fn expanded_name_to_string(name: &ExpandedName) -> String {
+    if name.ns == ns!(html) || name.ns.is_empty() {
+        name.local.to_lowercase()
+    } else {
+        format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
+    }
+}
+
+fn simple_element(
+    name: QualName,
+    attrs: Vec<(ExpandedName, Attribute)>,
+    children: Vec<NodeRef>,
+) -> NodeRef {
+    let element = NodeRef::new_element(name, attrs);
+    for child in children {
+        child.detach();
+        element.append(child);
+    }
+    element
+}
+
+fn create_space_text() -> NodeRef {
+    NodeRef::new_text(" ")
+}
+
+enum ElementAction<'t> {
+    Keep(&'t Element),
+    Delete,
+    Space,
+    Elide,
+    Rename(&'t str),
+}
+
+fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> {
+    let name = qual_name_to_string(element_name);
+    if name == "html" || name == "body" {
+        ElementAction::Elide
+    } else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) {
+        ElementAction::Keep(element_sanitizer)
+    } else if rules.delete_elements.contains(&name) {
+        ElementAction::Delete
+    } else if rules.space_elements.contains(&name) {
+        ElementAction::Space
+    } else if let Some(rename_to) = rules.rename_elements.get(&name) {
+        ElementAction::Rename(rename_to)
+    } else {
+        ElementAction::Elide
+    }
+}
+
+fn clean_nodes(nodes: impl IntoIterator<Item = NodeRef>, rules: &Rules) -> Vec<NodeRef> {
+    let mut result = Vec::new();
+    for node in nodes {
+        let subnodes = clean_node(&node, rules);
+        result.extend(subnodes);
+    }
+    result
+}
+
+fn clean_node(node: &NodeRef, rules: &Rules) -> Vec<NodeRef> {
+    match node.data() {
+        NodeData::Document(..) => vec![],
+        NodeData::DocumentFragment => vec![], // TODO: ??
+        NodeData::Doctype(..) => vec![],
+        NodeData::ProcessingInstruction(..) => vec![],
+
+        NodeData::Text(..) => vec![node.clone()],
+
+        NodeData::Comment(..) => {
+            if rules.allow_comments {
+                vec![node.clone()]
+            } else {
+                vec![]
+            }
+        }
+
+        NodeData::Element(ElementData {
+            ref name,
+            ref attributes,
+            ..
+        }) => {
+            match element_action(name, rules) {
+                ElementAction::Keep(element_sanitizer) => {
+                    let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new();
+
+                    /* whitelisted attributes */
+                    for (attr_name, attr_value) in attributes.borrow().map.iter() {
+                        if element_sanitizer
+                            .is_valid(&expanded_name_to_string(attr_name), &attr_value.value)
+                        {
+                            new_attrs.push((attr_name.clone(), attr_value.clone()));
+                        }
+                    }
+
+                    /* mandatory attributes */
+                    let mut mandatory_attributes: Vec<(&String, &String)> =
+                        element_sanitizer.mandatory_attributes.iter().collect();
+                    mandatory_attributes.sort();
+                    for &(attr_name, attr_value) in mandatory_attributes.iter() {
+                        new_attrs.push((
+                            ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())),
+                            Attribute {
+                                prefix: None,
+                                value: attr_value.into(),
+                            },
+                        ));
+                    }
+
+                    let children = clean_nodes(node.children(), rules);
+                    let element = simple_element(name.clone(), new_attrs, children);
+
+                    vec![element]
+                }
+
+                ElementAction::Delete => vec![],
+
+                ElementAction::Elide => clean_nodes(node.children(), rules),
+
+                ElementAction::Space => {
+                    let mut nodes = clean_nodes(node.children(), rules);
+                    if nodes.is_empty() {
+                        nodes.push(create_space_text());
+                    } else {
+                        nodes.insert(0, create_space_text());
+                        nodes.push(create_space_text());
+                    }
+                    nodes
+                }
+
+                ElementAction::Rename(rename_to) => {
+                    let children = clean_nodes(node.children(), rules);
+                    vec![simple_element(
+                        simple_qual_name(rename_to),
+                        Vec::new(),
+                        children,
+                    )]
+                }
+            }
+        }
+    }
+}
+
+pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef {
+    let new_children = clean_nodes(dom.children(), mode);
+    let new_dom = NodeRef::new_document();
+    for child in new_children {
+        child.detach();
+        new_dom.append(child);
+    }
+    new_dom
+}
--- a/sanitize-html-rs/src/tests.rs
+++ b/sanitize-html-rs/src/tests.rs
@ -0,0 +1,645 @@
+#![cfg(test)]
+
+use super::rules::predefined::*;
+use super::rules::{Element, Rules};
+use super::sanitize_str;
+
+#[test]
+fn empty() {
+    assert_eq!(&sanitize_str(&BASIC, "").unwrap(), "");
+    assert_eq!(&sanitize_str(&DEFAULT, "").unwrap(), "");
+    assert_eq!(&sanitize_str(&RELAXED, "").unwrap(), "");
+    assert_eq!(&sanitize_str(&RESTRICTED, "").unwrap(), "");
+    assert_eq!(&sanitize_str(&UNTRUSTED, "").unwrap(), "");
+}
+
+/* basic */
+
+const BASIC_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
+
+#[test]
+fn basic_default() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, BASIC_HTML).unwrap(),
+        "Lorem ipsum dolor sit amet "
+    );
+}
+
+#[test]
+fn basic_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, BASIC_HTML).unwrap(),
+        "<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn basic_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, BASIC_HTML).unwrap(),
+        "<b>Lorem</b> <a href=\"pants\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn basic_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, BASIC_HTML).unwrap(),
+        "<b>Lorem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
+    );
+}
+
+/* malformed */
+
+const MALFORMED_HTML: &str = "Lo<!-- comment -->rem</b> <a href=pants title=\"foo>ipsum <a href=\"http://foo.com/\"><strong>dolor</a></strong> sit<br/>amet <script>alert(\"hello world\");";
+
+#[test]
+fn malformed_default() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, MALFORMED_HTML).unwrap(),
+        "Lorem dolor sit amet "
+    );
+}
+
+#[test]
+fn malformed_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, MALFORMED_HTML).unwrap(),
+        "Lorem <strong>dolor</strong> sit amet alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn malformed_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, MALFORMED_HTML).unwrap(),
+        "Lorem <a href=\"pants\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn malformed_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, MALFORMED_HTML).unwrap(),
+        "Lorem <a href=\"pants\" title=\"foo>ipsum <a href=\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
+    );
+}
+
+/* unclosed */
+
+const UNCLOSED_HTML: &str = "<p>a</p><blockquote>b";
+
+#[test]
+fn unclosed_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, UNCLOSED_HTML).unwrap(), " a  b ");
+}
+
+#[test]
+fn unclosed_restricted() {
+    assert_eq!(&sanitize_str(&RESTRICTED, UNCLOSED_HTML).unwrap(), " a  b ");
+}
+
+#[test]
+fn unclosed_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, UNCLOSED_HTML).unwrap(),
+        "<p>a</p><blockquote>b</blockquote>"
+    );
+}
+
+#[test]
+fn unclosed_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, UNCLOSED_HTML).unwrap(),
+        "<p>a</p><blockquote>b</blockquote>"
+    );
+}
+
+/* malicious */
+
+const MALICIOUS_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert(\"hello world\");</script>";
+
+#[test]
+fn malicious_default() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, MALICIOUS_HTML).unwrap(),
+        "Lorem ipsum dolor sit amet &lt;script&gt;alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn malicious_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, MALICIOUS_HTML).unwrap(),
+        "<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn malicious_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, MALICIOUS_HTML).unwrap(),
+        "<b>Lorem</b> <a>ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn malicious_untrusted() {
+    assert_eq!(
+        &sanitize_str(&UNTRUSTED, MALICIOUS_HTML).unwrap(),
+        "<b>Lorem</b> <a rel=\"noreferrer noopener\" target=\"_blank\">ipsum</a> <a href=\"http://foo.com/\" rel=\"noreferrer noopener\" target=\"_blank\"><strong>dolor</strong></a> sit amet &lt;script&gt;alert(\"hello world\");"
+    );
+}
+
+#[test]
+fn malicious_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, MALICIOUS_HTML).unwrap(),
+        "<b>Lorem</b> <a title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert(\"hello world\");"
+    );
+}
+
+/* raw-comment */
+
+const RAW_COMMENT_HTML: &str = "<!-- comment -->Hello";
+
+#[test]
+fn raw_comment_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, RAW_COMMENT_HTML).unwrap(), "Hello");
+}
+
+#[test]
+fn raw_comment_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, RAW_COMMENT_HTML).unwrap(),
+        "Hello"
+    );
+}
+
+#[test]
+fn raw_comment_basic() {
+    assert_eq!(&sanitize_str(&BASIC, RAW_COMMENT_HTML).unwrap(), "Hello");
+}
+
+#[test]
+fn raw_comment_relaxed() {
+    assert_eq!(&sanitize_str(&RELAXED, RAW_COMMENT_HTML).unwrap(), "Hello");
+}
+
+/* protocol-based JS injection: simple, no spaces */
+
+const JS_INJECTION_HTML_1: &str = "<a href=\"javascript:alert(\'XSS\');\">foo</a>";
+
+#[test]
+fn js_injection_1_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_1).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_1_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_1).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_1_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_1).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_1_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_1).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: simple, spaces before */
+
+const JS_INJECTION_HTML_2: &str = "<a href=\"javascript :alert(\'XSS\');\">foo</a>";
+
+#[test]
+fn js_injection_2_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_2).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_2_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_2).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_2_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_2).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_2_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_2).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: simple, spaces after */
+
+const JS_INJECTION_HTML_3: &str = "<a href=\"javascript: alert(\'XSS\');\">foo</a>";
+
+#[test]
+fn js_injection_3_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_3).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_3_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_3).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_3_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_3).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_3_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_3).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: simple, spaces before and after */
+
+const JS_INJECTION_HTML_4: &str = "<a href=\"javascript : alert(\'XSS\');\">foo</a>";
+
+#[test]
+fn js_injection_4_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_4).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_4_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_4).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_4_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_4).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_4_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_4).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: preceding colon */
+
+const JS_INJECTION_HTML_5: &str = "<a href=\":javascript:alert(\'XSS\');\">foo</a>";
+
+#[test]
+fn js_injection_5_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_5).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_5_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_5).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_5_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_5).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_5_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_5).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: UTF-8 encoding */
+
+const JS_INJECTION_HTML_6: &str = "<a href=\"javascript&#58;\">foo</a>";
+
+#[test]
+fn js_injection_6_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_6).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_6_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_6).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_6_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_6).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_6_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_6).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: long UTF-8 encoding */
+
+const JS_INJECTION_HTML_7: &str = "<a href=\"javascript&#0058;\">foo</a>";
+
+#[test]
+fn js_injection_7_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_7).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_7_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_7).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_7_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_7).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_7_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_7).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: long UTF-8 encoding without semicolons */
+
+const JS_INJECTION_HTML_8: &str = "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>";
+
+#[test]
+fn js_injection_8_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_8).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_8_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_8).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_8_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_8).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_8_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_8).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: hex encoding */
+
+const JS_INJECTION_HTML_9: &str = "<a href=\"javascript&#x3A;\">foo</a>";
+
+#[test]
+fn js_injection_9_default() {
+    assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_9).unwrap(), "foo");
+}
+
+#[test]
+fn js_injection_9_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_9).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_9_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_9).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_9_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_9).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: long hex encoding */
+
+const JS_INJECTION_HTML_10: &str = "<a href=\"javascript&#x003A;\">foo</a>";
+
+#[test]
+fn js_injection_10_default() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, JS_INJECTION_HTML_10).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_10_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_10).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_10_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_10).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_10_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_10).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* protocol-based JS injection: hex encoding without semicolons */
+
+const JS_INJECTION_HTML_11: &str = "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>";
+
+#[test]
+fn js_injection_11_default() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, JS_INJECTION_HTML_11).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_11_restricted() {
+    assert_eq!(
+        &sanitize_str(&RESTRICTED, JS_INJECTION_HTML_11).unwrap(),
+        "foo"
+    );
+}
+
+#[test]
+fn js_injection_11_basic() {
+    assert_eq!(
+        &sanitize_str(&BASIC, JS_INJECTION_HTML_11).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+#[test]
+fn js_injection_11_relaxed() {
+    assert_eq!(
+        &sanitize_str(&RELAXED, JS_INJECTION_HTML_11).unwrap(),
+        "<a>foo</a>"
+    );
+}
+
+/* should translate valid HTML entities */
+
+#[test]
+fn misc_1() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, "Don&apos;t tas&eacute; me &amp; bro!").unwrap(),
+        "Don't tasé me &amp; bro!"
+    );
+}
+
+/* should translate valid HTML entities while encoding unencoded ampersands */
+
+#[test]
+fn misc_2() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, "cookies&sup2; & &frac14; cr&eacute;me").unwrap(),
+        "cookies² &amp; ¼ créme"
+    );
+}
+
+/* should never output &apos; */
+
+#[test]
+fn misc_3() {
+    assert_eq!(
+        &sanitize_str(
+            &DEFAULT,
+            "<a href='&apos;' class=\"' &#39;\">IE6 isn't a real browser</a>"
+        )
+        .unwrap(),
+        "IE6 isn't a real browser"
+    );
+}
+
+/* should not choke on several instances of the same element in a row */
+
+#[test]
+fn misc_4() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, "<img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\">").unwrap(),
+        ""
+    );
+}
+
+/* should surround the contents of :whitespace_elements with space characters when removing the element */
+
+#[test]
+fn misc_5() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, "foo<div>bar</div>baz").unwrap(),
+        "foo bar baz"
+    );
+}
+
+#[test]
+fn misc_6() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, "foo<br>bar<br>baz").unwrap(),
+        "foo bar baz"
+    );
+}
+
+#[test]
+fn misc_7() {
+    assert_eq!(
+        &sanitize_str(&DEFAULT, "foo<hr>bar<hr>baz").unwrap(),
+        "foo bar baz"
+    );
+}
+
+#[test]
+fn custom_rules() {
+    let rules = Rules::new()
+        .allow_comments(true)
+        .element(Element::new("b"))
+        .element(Element::new("span"))
+        .delete("script")
+        .delete("style")
+        .space("br")
+        .rename("strong", "span");
+
+    let html = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\")</script>";
+
+    assert_eq!(
+        &sanitize_str(&rules, html).unwrap(),
+        "<b>Lo<!-- comment -->rem</b> ipsum <span>dolor</span> sit amet "
+    );
+}