Add private fork of sanitize-html-rs

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
Jacob Kiers 2022-06-10 13:55:11 +02:00
parent 36da496aa1
commit 4e3f7b46da
17 changed files with 1674 additions and 1 deletions

10
Cargo.lock generated
View File

@ -747,6 +747,16 @@ version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
[[package]]
name = "sanitize_html"
version = "0.7.0"
dependencies = [
"html5ever",
"kuchiki",
"lazy_static",
"regex",
]
[[package]]
name = "scopeguard"
version = "1.1.0"

View File

@ -2,4 +2,5 @@
members = [
"bin",
]
"sanitize-html-rs",
]

View File

@ -0,0 +1,31 @@
name: Build
on: [push, pull_request]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- macOS-latest
- windows-latest
rust:
- stable
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust }}
override: true
- name: Build
run: |
cargo build --all-targets --no-default-features --verbose
cargo build --all-targets --verbose
- name: Run tests
run: cargo test --all-targets --verbose
env:
RUST_BACKTRACE: 1

View File

@ -0,0 +1,27 @@
name: Coverage
on:
pull_request:
push:
branches:
- master
jobs:
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- uses: actions-rs/install@v0.1
with:
crate: cargo-tarpaulin
use-tool-cache: true
- name: Run coverage
run: cargo tarpaulin -f -t 5 --out Xml -v -- --test-threads=1
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
token: ${{secrets.CODECOV_TOKEN}}

View File

@ -0,0 +1,24 @@
name: Style check
on: [push, pull_request]
jobs:
clippy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install clippy
uses: actions-rs/toolchain@v1
with:
toolchain: stable
components: clippy
- uses: actions-rs/clippy-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
args: --all --all-features
fmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Run fmt check
run: cargo fmt --all -- --check

4
sanitize-html-rs/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/target/
**/*.rs.bk
Cargo.lock
/.vscode

View File

@ -0,0 +1,16 @@
[package]
name = "sanitize_html"
version = "0.7.0"
authors = ["Andrey Kutejko <andy128k@gmail.com>"]
description = "Rule-based HTML Sanitization library"
keywords = ["html", "sanitize"]
license = "MIT"
homepage = "https://github.com/andy128k/sanitize-html-rs"
repository = "https://github.com/andy128k/sanitize-html-rs.git"
edition = "2018"
[dependencies]
regex = "1"
lazy_static = "1"
html5ever = "0.25"
kuchiki = "0.8"

View File

@ -0,0 +1,18 @@
Copyright (c) 2017 Andrey Kutejko
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,8 @@
# Sanitize HTML
[![Crates.io Status](https://img.shields.io/crates/v/sanitize_html.svg)](https://crates.io/crates/sanitize_html)
[![Build](https://github.com/andy128k/sanitize-html-rs/workflows/Build/badge.svg?branch=master&event=push)](https://github.com/andy128k/sanitize-html-rs/actions?query=workflow%3ABuild)
[![codecov](https://codecov.io/gh/andy128k/sanitize-html-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/andy128k/sanitize-html-rs)
[![dependency status](https://deps.rs/repo/github/andy128k/sanitize-html-rs/status.svg)](https://deps.rs/repo/github/andy128k/sanitize-html-rs)
This is a library for sanitization of HTML fragments.

View File

@ -0,0 +1,37 @@
//! Error types, which can be emited by sanitization procedure.
use std::error::Error;
use std::fmt;
/// Sanitization error
#[derive(Debug)]
pub enum SanitizeError {
/// UTF-8 decoding error
StrUtf8Error(std::str::Utf8Error),
/// UTF-8 decoding error
Utf8Error(std::string::FromUtf8Error),
/// Serialization error
SerializeError(std::io::Error),
}
impl fmt::Display for SanitizeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SanitizeError::StrUtf8Error(e) => write!(f, "UTF-8 decode error {}", e),
SanitizeError::Utf8Error(e) => write!(f, "UTF-8 decode error {}", e),
SanitizeError::SerializeError(e) => write!(f, "Serialization error {}", e),
}
}
}
impl Error for SanitizeError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
SanitizeError::StrUtf8Error(e) => Some(e),
SanitizeError::Utf8Error(e) => Some(e),
SanitizeError::SerializeError(e) => Some(e),
}
}
}

View File

@ -0,0 +1,42 @@
//! HTML Sanitization library
//!
//! # Examples
//!
//! ```
//! use sanitize_html::sanitize_str;
//! use sanitize_html::rules::predefined::DEFAULT;
//!
//! let input = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
//!
//! let sanitized_default: String = sanitize_str(&DEFAULT, input).unwrap();
//! assert_eq!(&sanitized_default, "Lorem ipsum dolor sit amet ");
//! ```
#![deny(missing_docs)]
pub mod errors;
mod parse;
pub mod rules;
mod sanitize;
mod tests;
use crate::errors::SanitizeError;
use crate::rules::Rules;
/// Sanitize HTML bytes
pub fn sanitize_bytes(rules: &Rules, input: &[u8]) -> Result<Vec<u8>, SanitizeError> {
let input_str = std::str::from_utf8(input).map_err(SanitizeError::StrUtf8Error)?;
let dom = parse::parse_str(input_str);
let new_dom = sanitize::sanitize_dom(&dom, rules);
let result_bytes = parse::unparse_bytes(&new_dom)?;
Ok(result_bytes)
}
/// Sanitize HTML string
pub fn sanitize_str(rules: &Rules, input: &str) -> Result<String, SanitizeError> {
let dom = parse::parse_str(input);
let new_dom = sanitize::sanitize_dom(&dom, rules);
let result_bytes = parse::unparse_bytes(&new_dom)?;
let result_string = String::from_utf8(result_bytes).map_err(SanitizeError::Utf8Error)?;
Ok(result_string)
}

View File

@ -0,0 +1,38 @@
use super::errors::SanitizeError;
use html5ever::{
interface::QualName,
local_name, namespace_prefix, namespace_url, ns, serialize,
serialize::{SerializeOpts, TraversalScope},
tendril::TendrilSink,
};
use kuchiki::{parse_html_with_options, NodeRef, ParseOpts};
use std::default::Default;
pub(crate) fn parse_str(input: &str) -> NodeRef {
let mut opts = ParseOpts::default();
opts.tree_builder.drop_doctype = true;
let mut parser = parse_html_with_options(opts);
parser.process(input.into());
parser.finish()
}
pub(crate) fn unparse_bytes(dom: &NodeRef) -> Result<Vec<u8>, SanitizeError> {
let mut buf: Vec<u8> = Vec::new();
let parent = QualName::new(
Some(namespace_prefix!("html")),
ns!(html),
local_name!("div"),
);
let opts = SerializeOpts {
scripting_enabled: false,
traversal_scope: TraversalScope::ChildrenOnly(Some(parent)),
create_missing_parent: false,
};
serialize(&mut buf, dom, opts).map_err(SanitizeError::SerializeError)?;
Ok(buf)
}

View File

@ -0,0 +1,104 @@
//! Structures to define sanitization rules.
pub mod pattern;
pub mod predefined;
use self::pattern::Pattern;
use std::collections::HashMap;
use std::collections::HashSet;
/// structure to describe HTML element
pub struct Element {
/// name of an element
pub name: String,
/// Whitelist of allowed attributes
pub attributes: HashMap<String, Pattern>,
/// List of mandatory atributes and their values.
/// These attributes will be forcibly added to element.
pub mandatory_attributes: HashMap<String, String>,
}
impl Element {
/// Creates element descriptor
pub fn new(name: &str) -> Self {
Self {
name: name.to_owned(),
attributes: HashMap::new(),
mandatory_attributes: HashMap::new(),
}
}
/// Adds an attribute
pub fn attribute(mut self, attribute: &str, pattern: Pattern) -> Self {
self.attributes.insert(attribute.to_owned(), pattern);
self
}
/// Adds mandatory attribute
pub fn mandatory_attribute(mut self, attribute: &str, value: &str) -> Self {
self.mandatory_attributes
.insert(attribute.to_owned(), value.to_owned());
self
}
/// Checks if attribute is valid
pub fn is_valid(&self, attribute: &str, value: &str) -> bool {
match self.attributes.get(attribute) {
None => false,
Some(pattern) => pattern.matches(value),
}
}
}
/// structure to describe sanitization rules
#[derive(Default)]
pub struct Rules {
/// Determines if comments are kept of stripped out of a document.
pub allow_comments: bool,
/// Allowed elements.
pub allowed_elements: HashMap<String, Element>,
/// Elements which will be removed together with their children.
pub delete_elements: HashSet<String>,
/// Elements which will be replaced by spaces (Their children will be processed recursively).
pub space_elements: HashSet<String>,
/// Elements which will be renamed.
pub rename_elements: HashMap<String, String>,
}
impl Rules {
/// Creates a new rules set.
pub fn new() -> Self {
Self::default()
}
/// Sets if comments are allowed
pub fn allow_comments(mut self, allow_comments: bool) -> Self {
self.allow_comments = allow_comments;
self
}
/// Adds a rule for an allowed element
pub fn element(mut self, element: Element) -> Self {
self.allowed_elements.insert(element.name.clone(), element);
self
}
/// Adds a rule to delete an element
pub fn delete(mut self, element_name: &str) -> Self {
self.delete_elements.insert(element_name.to_owned());
self
}
/// Adds a rule to replace an element with space
pub fn space(mut self, element_name: &str) -> Self {
self.space_elements.insert(element_name.to_owned());
self
}
/// Adds a rule to rename an element
pub fn rename(mut self, element_name: &str, to: &str) -> Self {
self.rename_elements
.insert(element_name.to_owned(), to.to_owned());
self
}
}

View File

@ -0,0 +1,127 @@
//! This module contains code dedicated to check validity of attribute's value.
//!
//! # Examples
//! ```
//! use sanitize_html::rules::pattern::Pattern;
//! use regex::Regex;
//!
//! let href = Pattern::regex(Regex::new("^(ftp:|http:|https:|mailto:)").unwrap()) |
//! !Pattern::regex(Regex::new("^[^/]+[[:space:]]*:").unwrap());
//!
//! assert!(href.matches("filename.xls"));
//! assert!(href.matches("http://foo.com/"));
//! assert!(href.matches(" filename with spaces .zip "));
//! assert!(!href.matches(" javascript : window.location = '//example.com/'")); // Attempt to make XSS
//! ```
use regex::Regex;
/// Value pattern
pub struct Pattern(pub Box<dyn Fn(&str) -> bool + Sync + Send>);
impl Pattern {
/// Creates pattern which accepts any value.
///
/// # Example
/// ```
/// use sanitize_html::rules::pattern::Pattern;
/// use regex::Regex;
///
/// let pattern = Pattern::any();
/// assert!(pattern.matches(""));
/// assert!(pattern.matches("pants"));
/// ```
pub fn any() -> Self {
Pattern(Box::new(move |_value| true))
}
/// Creates pattern which uses regular expression to check a value. Panics
///
/// # Example
/// ```
/// use sanitize_html::rules::pattern::Pattern;
/// use regex::Regex;
///
/// let pattern = Pattern::regex(Regex::new("ant").unwrap());
/// assert!(!pattern.matches(""));
/// assert!(pattern.matches("pants"));
/// ```
pub fn regex(re: Regex) -> Self {
Pattern(Box::new(move |value| re.is_match(value)))
}
/// Checks if a value matches to a pattern.
pub fn matches(&self, value: &str) -> bool {
(self.0)(value)
}
}
impl ::std::ops::Not for Pattern {
type Output = Pattern;
/// Negates pattern
///
/// # Example
/// ```
/// use sanitize_html::rules::pattern::Pattern;
/// use regex::Regex;
///
/// let pattern = !Pattern::any();
/// assert!(!pattern.matches(""));
/// assert!(!pattern.matches("pants"));
/// ```
fn not(self) -> Self::Output {
let cb = self.0;
Pattern(Box::new(move |value| !cb(value)))
}
}
impl ::std::ops::BitAnd for Pattern {
type Output = Pattern;
/// Combines two patterns into a pattern which matches a string iff both patterns match that string.
///
/// # Example
/// ```
/// use sanitize_html::rules::pattern::Pattern;
/// use regex::Regex;
///
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
/// let ant = Pattern::regex(Regex::new("ant").unwrap());
/// let pattern = pan & ant;
///
/// assert!(!pattern.matches("pan"));
/// assert!(!pattern.matches("ant"));
/// assert!(pattern.matches("pants"));
/// ```
fn bitand(self, rhs: Pattern) -> Self::Output {
let cb1 = self.0;
let cb2 = rhs.0;
Pattern(Box::new(move |value| cb1(value) && cb2(value)))
}
}
impl ::std::ops::BitOr for Pattern {
type Output = Pattern;
/// Combines two patterns into a pattern which matches a string if one of patterns matches that string.
///
/// # Example
/// ```
/// use sanitize_html::rules::pattern::Pattern;
/// use regex::Regex;
///
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
/// let pot = Pattern::regex(Regex::new("pot").unwrap());
/// let pattern = pan | pot;
///
/// assert!(pattern.matches("pants"));
/// assert!(pattern.matches("pot"));
/// assert!(!pattern.matches("jar"));
/// ```
fn bitor(self, rhs: Pattern) -> Self::Output {
let cb1 = self.0;
let cb2 = rhs.0;
Pattern(Box::new(move |value| cb1(value) || cb2(value)))
}
}

View File

@ -0,0 +1,374 @@
//! Predefined rules
//!
//! These rules are inspired by a great Ruby gem [sanitize](https://github.com/rgrove/sanitize/).
use super::pattern::Pattern;
use super::{Element, Rules};
use lazy_static::lazy_static;
use regex::Regex;
fn re(regex: &str) -> Pattern {
Pattern::regex(Regex::new(regex).unwrap())
}
fn href() -> Pattern {
re("^(ftp:|http:|https:|mailto:)") | !re("^[^/]+[[:space:]]*:")
}
fn src() -> Pattern {
re("^(http:|https:)") | !re("^[^/]+[[:space:]]*:")
}
lazy_static! {
/// Basic rules. Allows a variety of markup including formatting elements, links, and lists.
pub static ref BASIC: Rules = basic();
/// Default rules. Removes all tags.
pub static ref DEFAULT: Rules = default();
/// Relaxed rules. Allows an even wider variety of markup, including images and tables
pub static ref RELAXED: Rules = relaxed();
/// Restricted rules. Allows only very simple inline markup. No links, images, or block elements.
pub static ref RESTRICTED: Rules = restricted();
/// Rules for document from untrusted sources. Removes all tags but text emphasizing and links.
pub static ref UNTRUSTED: Rules = untrusted();
}
fn basic() -> Rules {
Rules::new()
.element(Element::new("a").attribute("href", href()))
.element(Element::new("abbr").attribute("title", Pattern::any()))
.element(Element::new("b"))
.element(Element::new("blockquote").attribute("cite", src()))
.element(Element::new("br"))
.element(Element::new("br"))
.element(Element::new("cite"))
.element(Element::new("code"))
.element(Element::new("dd"))
.element(Element::new("dfn").attribute("title", Pattern::any()))
.element(Element::new("dl"))
.element(Element::new("dt"))
.element(Element::new("em"))
.element(Element::new("i"))
.element(Element::new("kbd"))
.element(Element::new("li"))
.element(Element::new("mark"))
.element(Element::new("ol"))
.element(Element::new("p"))
.element(Element::new("pre"))
.element(Element::new("q").attribute("cite", src()))
.element(Element::new("s"))
.element(Element::new("samp"))
.element(Element::new("small"))
.element(Element::new("strike"))
.element(Element::new("strong"))
.element(Element::new("sub"))
.element(Element::new("sup"))
.element(
Element::new("time")
.attribute("datetime", Pattern::any())
.attribute("pubdate", Pattern::any()),
)
.element(Element::new("u"))
.element(Element::new("ul"))
.element(Element::new("var"))
.space("address")
.space("article")
.space("aside")
.space("div")
.space("footer")
.space("h1")
.space("h2")
.space("h3")
.space("h4")
.space("h5")
.space("h6")
.space("header")
.space("hgroup")
.space("hr")
.space("nav")
.space("section")
}
fn default() -> Rules {
Rules::new()
.space("address")
.space("article")
.space("aside")
.space("blockquote")
.space("br")
.space("dd")
.space("div")
.space("dl")
.space("dt")
.space("footer")
.space("h1")
.space("h2")
.space("h3")
.space("h4")
.space("h5")
.space("h6")
.space("header")
.space("hgroup")
.space("hr")
.space("li")
.space("nav")
.space("ol")
.space("p")
.space("pre")
.space("section")
.space("ul")
.delete("iframe")
.delete("noembed")
.delete("noframes")
.delete("noscript")
.delete("script")
.delete("style")
}
fn relaxed() -> Rules {
fn relaxed_element(name: &str) -> Element {
Element::new(name)
.attribute("dir", Pattern::any())
.attribute("lang", Pattern::any())
.attribute("title", Pattern::any())
.attribute("class", Pattern::any())
}
Rules::new()
.element(relaxed_element("a").attribute("href", href()))
.element(relaxed_element("abbr"))
.element(relaxed_element("b"))
.element(relaxed_element("bdo"))
.element(relaxed_element("blockquote").attribute("cite", src()))
.element(relaxed_element("br"))
.element(relaxed_element("caption"))
.element(relaxed_element("cite"))
.element(relaxed_element("code"))
.element(
relaxed_element("col")
.attribute("span", Pattern::any())
.attribute("width", Pattern::any()),
)
.element(
relaxed_element("colgroup")
.attribute("span", Pattern::any())
.attribute("width", Pattern::any()),
)
.element(relaxed_element("dd"))
.element(
relaxed_element("del")
.attribute("cite", src())
.attribute("datetime", Pattern::any()),
)
.element(relaxed_element("dfn"))
.element(relaxed_element("dl"))
.element(relaxed_element("dt"))
.element(relaxed_element("em"))
.element(relaxed_element("figcaption"))
.element(relaxed_element("figure"))
.element(relaxed_element("h1"))
.element(relaxed_element("h2"))
.element(relaxed_element("h3"))
.element(relaxed_element("h4"))
.element(relaxed_element("h5"))
.element(relaxed_element("h6"))
.element(relaxed_element("hgroup"))
.element(relaxed_element("i"))
.element(
relaxed_element("img")
.attribute("src", src())
.attribute("align", Pattern::any())
.attribute("alt", Pattern::any())
.attribute("width", Pattern::any())
.attribute("height", Pattern::any()),
)
.element(
relaxed_element("ins")
.attribute("cite", src())
.attribute("datetime", Pattern::any()),
)
.element(relaxed_element("kbd"))
.element(relaxed_element("li"))
.element(relaxed_element("mark"))
.element(
relaxed_element("ol")
.attribute("start", Pattern::any())
.attribute("reversed", Pattern::any())
.attribute("type", Pattern::any()),
)
.element(relaxed_element("p"))
.element(relaxed_element("pre"))
.element(relaxed_element("q").attribute("cite", src()))
.element(relaxed_element("rp"))
.element(relaxed_element("rt"))
.element(relaxed_element("ruby"))
.element(relaxed_element("s"))
.element(relaxed_element("samp"))
.element(relaxed_element("small"))
.element(relaxed_element("strike"))
.element(relaxed_element("strong"))
.element(relaxed_element("sub"))
.element(relaxed_element("sup"))
.element(
relaxed_element("table")
.attribute("summary", Pattern::any())
.attribute("width", Pattern::any()),
)
.element(relaxed_element("tbody"))
.element(
relaxed_element("td")
.attribute("abbr", Pattern::any())
.attribute("axis", Pattern::any())
.attribute("colspan", Pattern::any())
.attribute("rowspan", Pattern::any())
.attribute("width", Pattern::any()),
)
.element(relaxed_element("tfoot"))
.element(
relaxed_element("th")
.attribute("abbr", Pattern::any())
.attribute("axis", Pattern::any())
.attribute("colspan", Pattern::any())
.attribute("rowspan", Pattern::any())
.attribute("scope", Pattern::any())
.attribute("width", Pattern::any()),
)
.element(relaxed_element("thead"))
.element(
relaxed_element("time")
.attribute("datetime", Pattern::any())
.attribute("pubdate", Pattern::any()),
)
.element(relaxed_element("tr"))
.element(relaxed_element("u"))
.element(relaxed_element("ul").attribute("type", Pattern::any()))
.element(relaxed_element("var"))
.element(relaxed_element("wbr"))
.space("address")
.space("article")
.space("aside")
.space("footer")
.space("header")
.space("hr")
.space("nav")
.space("section")
}
fn restricted() -> Rules {
Rules::new()
.element(Element::new("b"))
.element(Element::new("em"))
.element(Element::new("i"))
.element(Element::new("strong"))
.element(Element::new("u"))
.space("address")
.space("article")
.space("aside")
.space("blockquote")
.space("br")
.space("dd")
.space("div")
.space("dl")
.space("dt")
.space("footer")
.space("h1")
.space("h2")
.space("h3")
.space("h4")
.space("h5")
.space("h6")
.space("header")
.space("hgroup")
.space("hr")
.space("li")
.space("nav")
.space("ol")
.space("p")
.space("pre")
.space("section")
.space("ul")
}
fn untrusted() -> Rules {
Rules::new()
.element(
Element::new("a")
.attribute("href", href())
.mandatory_attribute("target", "_blank")
.mandatory_attribute("rel", "noreferrer noopener"),
)
.element(Element::new("b"))
.element(Element::new("em"))
.element(Element::new("i"))
.element(Element::new("strong"))
.element(Element::new("u"))
.space("address")
.space("article")
.space("aside")
.space("blockquote")
.space("br")
.space("dd")
.space("div")
.space("dl")
.space("dt")
.space("footer")
.space("h1")
.space("h2")
.space("h3")
.space("h4")
.space("h5")
.space("h6")
.space("header")
.space("hgroup")
.space("hr")
.space("li")
.space("nav")
.space("ol")
.space("p")
.space("pre")
.space("section")
.space("ul")
}
#[cfg(test)]
mod tests {
use super::{basic, default, relaxed, restricted, untrusted};
#[test]
fn basic_does_not_fail() {
let rules = basic();
assert_eq!(rules.allowed_elements.len(), 31);
}
#[test]
fn default_does_not_fail() {
let rules = default();
assert_eq!(rules.allowed_elements.len(), 0);
assert_eq!(rules.space_elements.len(), 26);
assert_eq!(rules.delete_elements.len(), 6);
}
#[test]
fn relaxed_does_not_fail() {
let rules = relaxed();
assert_eq!(rules.allowed_elements.len(), 58);
assert_eq!(rules.space_elements.len(), 8);
}
#[test]
fn restricted_does_not_fail() {
let rules = restricted();
assert_eq!(rules.allowed_elements.len(), 5);
assert_eq!(rules.space_elements.len(), 26);
}
#[test]
fn untrusted_does_not_fail() {
let rules = untrusted();
assert_eq!(rules.allowed_elements.len(), 6);
assert_eq!(rules.space_elements.len(), 26);
}
}

View File

@ -0,0 +1,167 @@
use crate::rules::{Element, Rules};
use html5ever::{interface::QualName, namespace_url, ns, LocalName};
use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef};
fn simple_qual_name(name: &str) -> QualName {
QualName::new(None, ns!(), LocalName::from(name))
}
fn qual_name_to_string(name: &QualName) -> String {
if name.ns == ns!(html) || name.ns.is_empty() {
name.local.to_lowercase()
} else {
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
}
}
fn expanded_name_to_string(name: &ExpandedName) -> String {
if name.ns == ns!(html) || name.ns.is_empty() {
name.local.to_lowercase()
} else {
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
}
}
fn simple_element(
name: QualName,
attrs: Vec<(ExpandedName, Attribute)>,
children: Vec<NodeRef>,
) -> NodeRef {
let element = NodeRef::new_element(name, attrs);
for child in children {
child.detach();
element.append(child);
}
element
}
fn create_space_text() -> NodeRef {
NodeRef::new_text(" ")
}
enum ElementAction<'t> {
Keep(&'t Element),
Delete,
Space,
Elide,
Rename(&'t str),
}
fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> {
let name = qual_name_to_string(element_name);
if name == "html" || name == "body" {
ElementAction::Elide
} else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) {
ElementAction::Keep(element_sanitizer)
} else if rules.delete_elements.contains(&name) {
ElementAction::Delete
} else if rules.space_elements.contains(&name) {
ElementAction::Space
} else if let Some(rename_to) = rules.rename_elements.get(&name) {
ElementAction::Rename(rename_to)
} else {
ElementAction::Elide
}
}
fn clean_nodes(nodes: impl IntoIterator<Item = NodeRef>, rules: &Rules) -> Vec<NodeRef> {
let mut result = Vec::new();
for node in nodes {
let subnodes = clean_node(&node, rules);
result.extend(subnodes);
}
result
}
fn clean_node(node: &NodeRef, rules: &Rules) -> Vec<NodeRef> {
match node.data() {
NodeData::Document(..) => vec![],
NodeData::DocumentFragment => vec![], // TODO: ??
NodeData::Doctype(..) => vec![],
NodeData::ProcessingInstruction(..) => vec![],
NodeData::Text(..) => vec![node.clone()],
NodeData::Comment(..) => {
if rules.allow_comments {
vec![node.clone()]
} else {
vec![]
}
}
NodeData::Element(ElementData {
ref name,
ref attributes,
..
}) => {
match element_action(name, rules) {
ElementAction::Keep(element_sanitizer) => {
let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new();
/* whitelisted attributes */
for (attr_name, attr_value) in attributes.borrow().map.iter() {
if element_sanitizer
.is_valid(&expanded_name_to_string(attr_name), &attr_value.value)
{
new_attrs.push((attr_name.clone(), attr_value.clone()));
}
}
/* mandatory attributes */
let mut mandatory_attributes: Vec<(&String, &String)> =
element_sanitizer.mandatory_attributes.iter().collect();
mandatory_attributes.sort();
for &(attr_name, attr_value) in mandatory_attributes.iter() {
new_attrs.push((
ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())),
Attribute {
prefix: None,
value: attr_value.into(),
},
));
}
let children = clean_nodes(node.children(), rules);
let element = simple_element(name.clone(), new_attrs, children);
vec![element]
}
ElementAction::Delete => vec![],
ElementAction::Elide => clean_nodes(node.children(), rules),
ElementAction::Space => {
let mut nodes = clean_nodes(node.children(), rules);
if nodes.is_empty() {
nodes.push(create_space_text());
} else {
nodes.insert(0, create_space_text());
nodes.push(create_space_text());
}
nodes
}
ElementAction::Rename(rename_to) => {
let children = clean_nodes(node.children(), rules);
vec![simple_element(
simple_qual_name(rename_to),
Vec::new(),
children,
)]
}
}
}
}
}
pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef {
let new_children = clean_nodes(dom.children(), mode);
let new_dom = NodeRef::new_document();
for child in new_children {
child.detach();
new_dom.append(child);
}
new_dom
}

View File

@ -0,0 +1,645 @@
#![cfg(test)]
use super::rules::predefined::*;
use super::rules::{Element, Rules};
use super::sanitize_str;
#[test]
fn empty() {
assert_eq!(&sanitize_str(&BASIC, "").unwrap(), "");
assert_eq!(&sanitize_str(&DEFAULT, "").unwrap(), "");
assert_eq!(&sanitize_str(&RELAXED, "").unwrap(), "");
assert_eq!(&sanitize_str(&RESTRICTED, "").unwrap(), "");
assert_eq!(&sanitize_str(&UNTRUSTED, "").unwrap(), "");
}
/* basic */
const BASIC_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
#[test]
fn basic_default() {
assert_eq!(
&sanitize_str(&DEFAULT, BASIC_HTML).unwrap(),
"Lorem ipsum dolor sit amet "
);
}
#[test]
fn basic_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, BASIC_HTML).unwrap(),
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert(\"hello world\");"
);
}
#[test]
fn basic_basic() {
assert_eq!(
&sanitize_str(&BASIC, BASIC_HTML).unwrap(),
"<b>Lorem</b> <a href=\"pants\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
);
}
#[test]
fn basic_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, BASIC_HTML).unwrap(),
"<b>Lorem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
);
}
/* malformed */
const MALFORMED_HTML: &str = "Lo<!-- comment -->rem</b> <a href=pants title=\"foo>ipsum <a href=\"http://foo.com/\"><strong>dolor</a></strong> sit<br/>amet <script>alert(\"hello world\");";
#[test]
fn malformed_default() {
assert_eq!(
&sanitize_str(&DEFAULT, MALFORMED_HTML).unwrap(),
"Lorem dolor sit amet "
);
}
#[test]
fn malformed_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, MALFORMED_HTML).unwrap(),
"Lorem <strong>dolor</strong> sit amet alert(\"hello world\");"
);
}
#[test]
fn malformed_basic() {
assert_eq!(
&sanitize_str(&BASIC, MALFORMED_HTML).unwrap(),
"Lorem <a href=\"pants\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
);
}
#[test]
fn malformed_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, MALFORMED_HTML).unwrap(),
"Lorem <a href=\"pants\" title=\"foo>ipsum <a href=\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
);
}
/* unclosed */
const UNCLOSED_HTML: &str = "<p>a</p><blockquote>b";
#[test]
fn unclosed_default() {
assert_eq!(&sanitize_str(&DEFAULT, UNCLOSED_HTML).unwrap(), " a b ");
}
#[test]
fn unclosed_restricted() {
assert_eq!(&sanitize_str(&RESTRICTED, UNCLOSED_HTML).unwrap(), " a b ");
}
#[test]
fn unclosed_basic() {
assert_eq!(
&sanitize_str(&BASIC, UNCLOSED_HTML).unwrap(),
"<p>a</p><blockquote>b</blockquote>"
);
}
#[test]
fn unclosed_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, UNCLOSED_HTML).unwrap(),
"<p>a</p><blockquote>b</blockquote>"
);
}
/* malicious */
const MALICIOUS_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert(\"hello world\");</script>";
#[test]
fn malicious_default() {
assert_eq!(
&sanitize_str(&DEFAULT, MALICIOUS_HTML).unwrap(),
"Lorem ipsum dolor sit amet &lt;script&gt;alert(\"hello world\");"
);
}
#[test]
fn malicious_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, MALICIOUS_HTML).unwrap(),
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert(\"hello world\");"
);
}
#[test]
fn malicious_basic() {
assert_eq!(
&sanitize_str(&BASIC, MALICIOUS_HTML).unwrap(),
"<b>Lorem</b> <a>ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert(\"hello world\");"
);
}
#[test]
fn malicious_untrusted() {
assert_eq!(
&sanitize_str(&UNTRUSTED, MALICIOUS_HTML).unwrap(),
"<b>Lorem</b> <a rel=\"noreferrer noopener\" target=\"_blank\">ipsum</a> <a href=\"http://foo.com/\" rel=\"noreferrer noopener\" target=\"_blank\"><strong>dolor</strong></a> sit amet &lt;script&gt;alert(\"hello world\");"
);
}
#[test]
fn malicious_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, MALICIOUS_HTML).unwrap(),
"<b>Lorem</b> <a title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert(\"hello world\");"
);
}
/* raw-comment */
const RAW_COMMENT_HTML: &str = "<!-- comment -->Hello";
#[test]
fn raw_comment_default() {
assert_eq!(&sanitize_str(&DEFAULT, RAW_COMMENT_HTML).unwrap(), "Hello");
}
#[test]
fn raw_comment_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, RAW_COMMENT_HTML).unwrap(),
"Hello"
);
}
#[test]
fn raw_comment_basic() {
assert_eq!(&sanitize_str(&BASIC, RAW_COMMENT_HTML).unwrap(), "Hello");
}
#[test]
fn raw_comment_relaxed() {
assert_eq!(&sanitize_str(&RELAXED, RAW_COMMENT_HTML).unwrap(), "Hello");
}
/* protocol-based JS injection: simple, no spaces */
const JS_INJECTION_HTML_1: &str = "<a href=\"javascript:alert(\'XSS\');\">foo</a>";
#[test]
fn js_injection_1_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_1).unwrap(), "foo");
}
#[test]
fn js_injection_1_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_1).unwrap(),
"foo"
);
}
#[test]
fn js_injection_1_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_1).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_1_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_1).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: simple, spaces before */
const JS_INJECTION_HTML_2: &str = "<a href=\"javascript :alert(\'XSS\');\">foo</a>";
#[test]
fn js_injection_2_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_2).unwrap(), "foo");
}
#[test]
fn js_injection_2_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_2).unwrap(),
"foo"
);
}
#[test]
fn js_injection_2_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_2).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_2_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_2).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: simple, spaces after */
const JS_INJECTION_HTML_3: &str = "<a href=\"javascript: alert(\'XSS\');\">foo</a>";
#[test]
fn js_injection_3_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_3).unwrap(), "foo");
}
#[test]
fn js_injection_3_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_3).unwrap(),
"foo"
);
}
#[test]
fn js_injection_3_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_3).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_3_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_3).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: simple, spaces before and after */
const JS_INJECTION_HTML_4: &str = "<a href=\"javascript : alert(\'XSS\');\">foo</a>";
#[test]
fn js_injection_4_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_4).unwrap(), "foo");
}
#[test]
fn js_injection_4_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_4).unwrap(),
"foo"
);
}
#[test]
fn js_injection_4_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_4).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_4_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_4).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: preceding colon */
const JS_INJECTION_HTML_5: &str = "<a href=\":javascript:alert(\'XSS\');\">foo</a>";
#[test]
fn js_injection_5_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_5).unwrap(), "foo");
}
#[test]
fn js_injection_5_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_5).unwrap(),
"foo"
);
}
#[test]
fn js_injection_5_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_5).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_5_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_5).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: UTF-8 encoding */
const JS_INJECTION_HTML_6: &str = "<a href=\"javascript&#58;\">foo</a>";
#[test]
fn js_injection_6_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_6).unwrap(), "foo");
}
#[test]
fn js_injection_6_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_6).unwrap(),
"foo"
);
}
#[test]
fn js_injection_6_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_6).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_6_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_6).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: long UTF-8 encoding */
const JS_INJECTION_HTML_7: &str = "<a href=\"javascript&#0058;\">foo</a>";
#[test]
fn js_injection_7_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_7).unwrap(), "foo");
}
#[test]
fn js_injection_7_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_7).unwrap(),
"foo"
);
}
#[test]
fn js_injection_7_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_7).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_7_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_7).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: long UTF-8 encoding without semicolons */
const JS_INJECTION_HTML_8: &str = "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>";
#[test]
fn js_injection_8_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_8).unwrap(), "foo");
}
#[test]
fn js_injection_8_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_8).unwrap(),
"foo"
);
}
#[test]
fn js_injection_8_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_8).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_8_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_8).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: hex encoding */
const JS_INJECTION_HTML_9: &str = "<a href=\"javascript&#x3A;\">foo</a>";
#[test]
fn js_injection_9_default() {
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_9).unwrap(), "foo");
}
#[test]
fn js_injection_9_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_9).unwrap(),
"foo"
);
}
#[test]
fn js_injection_9_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_9).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_9_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_9).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: long hex encoding */
const JS_INJECTION_HTML_10: &str = "<a href=\"javascript&#x003A;\">foo</a>";
#[test]
fn js_injection_10_default() {
assert_eq!(
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_10).unwrap(),
"foo"
);
}
#[test]
fn js_injection_10_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_10).unwrap(),
"foo"
);
}
#[test]
fn js_injection_10_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_10).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_10_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_10).unwrap(),
"<a>foo</a>"
);
}
/* protocol-based JS injection: hex encoding without semicolons */
const JS_INJECTION_HTML_11: &str = "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>";
#[test]
fn js_injection_11_default() {
assert_eq!(
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_11).unwrap(),
"foo"
);
}
#[test]
fn js_injection_11_restricted() {
assert_eq!(
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_11).unwrap(),
"foo"
);
}
#[test]
fn js_injection_11_basic() {
assert_eq!(
&sanitize_str(&BASIC, JS_INJECTION_HTML_11).unwrap(),
"<a>foo</a>"
);
}
#[test]
fn js_injection_11_relaxed() {
assert_eq!(
&sanitize_str(&RELAXED, JS_INJECTION_HTML_11).unwrap(),
"<a>foo</a>"
);
}
/* should translate valid HTML entities */
#[test]
fn misc_1() {
assert_eq!(
&sanitize_str(&DEFAULT, "Don&apos;t tas&eacute; me &amp; bro!").unwrap(),
"Don't tasé me &amp; bro!"
);
}
/* should translate valid HTML entities while encoding unencoded ampersands */
#[test]
fn misc_2() {
assert_eq!(
&sanitize_str(&DEFAULT, "cookies&sup2; & &frac14; cr&eacute;me").unwrap(),
"cookies² &amp; ¼ créme"
);
}
/* should never output &apos; */
#[test]
fn misc_3() {
assert_eq!(
&sanitize_str(
&DEFAULT,
"<a href='&apos;' class=\"' &#39;\">IE6 isn't a real browser</a>"
)
.unwrap(),
"IE6 isn't a real browser"
);
}
/* should not choke on several instances of the same element in a row */
#[test]
fn misc_4() {
assert_eq!(
&sanitize_str(&DEFAULT, "<img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\">").unwrap(),
""
);
}
/* should surround the contents of :whitespace_elements with space characters when removing the element */
#[test]
fn misc_5() {
assert_eq!(
&sanitize_str(&DEFAULT, "foo<div>bar</div>baz").unwrap(),
"foo bar baz"
);
}
#[test]
fn misc_6() {
assert_eq!(
&sanitize_str(&DEFAULT, "foo<br>bar<br>baz").unwrap(),
"foo bar baz"
);
}
#[test]
fn misc_7() {
assert_eq!(
&sanitize_str(&DEFAULT, "foo<hr>bar<hr>baz").unwrap(),
"foo bar baz"
);
}
#[test]
fn custom_rules() {
let rules = Rules::new()
.allow_comments(true)
.element(Element::new("b"))
.element(Element::new("span"))
.delete("script")
.delete("style")
.space("br")
.rename("strong", "span");
let html = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\")</script>";
assert_eq!(
&sanitize_str(&rules, html).unwrap(),
"<b>Lo<!-- comment -->rem</b> ipsum <span>dolor</span> sit amet "
);
}