Add private fork of sanitize-html-rs
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
parent
36da496aa1
commit
4e3f7b46da
|
@ -747,6 +747,16 @@ version = "1.0.10"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
|
||||
|
||||
[[package]]
|
||||
name = "sanitize_html"
|
||||
version = "0.7.0"
|
||||
dependencies = [
|
||||
"html5ever",
|
||||
"kuchiki",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
|
|
|
@ -2,4 +2,5 @@
|
|||
|
||||
members = [
|
||||
"bin",
|
||||
]
|
||||
"sanitize-html-rs",
|
||||
]
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
name: Build
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- macOS-latest
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
override: true
|
||||
- name: Build
|
||||
run: |
|
||||
cargo build --all-targets --no-default-features --verbose
|
||||
cargo build --all-targets --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --all-targets --verbose
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
|
@ -0,0 +1,27 @@
|
|||
name: Coverage
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
coverage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions-rs/install@v0.1
|
||||
with:
|
||||
crate: cargo-tarpaulin
|
||||
use-tool-cache: true
|
||||
- name: Run coverage
|
||||
run: cargo tarpaulin -f -t 5 --out Xml -v -- --test-threads=1
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
token: ${{secrets.CODECOV_TOKEN}}
|
|
@ -0,0 +1,24 @@
|
|||
name: Style check
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
clippy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install clippy
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
components: clippy
|
||||
- uses: actions-rs/clippy-check@v1
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
args: --all --all-features
|
||||
fmt:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Run fmt check
|
||||
run: cargo fmt --all -- --check
|
|
@ -0,0 +1,4 @@
|
|||
/target/
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
/.vscode
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "sanitize_html"
|
||||
version = "0.7.0"
|
||||
authors = ["Andrey Kutejko <andy128k@gmail.com>"]
|
||||
description = "Rule-based HTML Sanitization library"
|
||||
keywords = ["html", "sanitize"]
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/andy128k/sanitize-html-rs"
|
||||
repository = "https://github.com/andy128k/sanitize-html-rs.git"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
regex = "1"
|
||||
lazy_static = "1"
|
||||
html5ever = "0.25"
|
||||
kuchiki = "0.8"
|
|
@ -0,0 +1,18 @@
|
|||
Copyright (c) 2017 Andrey Kutejko
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,8 @@
|
|||
# Sanitize HTML
|
||||
|
||||
[![Crates.io Status](https://img.shields.io/crates/v/sanitize_html.svg)](https://crates.io/crates/sanitize_html)
|
||||
[![Build](https://github.com/andy128k/sanitize-html-rs/workflows/Build/badge.svg?branch=master&event=push)](https://github.com/andy128k/sanitize-html-rs/actions?query=workflow%3ABuild)
|
||||
[![codecov](https://codecov.io/gh/andy128k/sanitize-html-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/andy128k/sanitize-html-rs)
|
||||
[![dependency status](https://deps.rs/repo/github/andy128k/sanitize-html-rs/status.svg)](https://deps.rs/repo/github/andy128k/sanitize-html-rs)
|
||||
|
||||
This is a library for sanitization of HTML fragments.
|
|
@ -0,0 +1,37 @@
|
|||
//! Error types, which can be emited by sanitization procedure.
|
||||
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
/// Sanitization error
|
||||
#[derive(Debug)]
|
||||
pub enum SanitizeError {
|
||||
/// UTF-8 decoding error
|
||||
StrUtf8Error(std::str::Utf8Error),
|
||||
|
||||
/// UTF-8 decoding error
|
||||
Utf8Error(std::string::FromUtf8Error),
|
||||
|
||||
/// Serialization error
|
||||
SerializeError(std::io::Error),
|
||||
}
|
||||
|
||||
impl fmt::Display for SanitizeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SanitizeError::StrUtf8Error(e) => write!(f, "UTF-8 decode error {}", e),
|
||||
SanitizeError::Utf8Error(e) => write!(f, "UTF-8 decode error {}", e),
|
||||
SanitizeError::SerializeError(e) => write!(f, "Serialization error {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SanitizeError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
SanitizeError::StrUtf8Error(e) => Some(e),
|
||||
SanitizeError::Utf8Error(e) => Some(e),
|
||||
SanitizeError::SerializeError(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
//! HTML Sanitization library
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```
|
||||
//! use sanitize_html::sanitize_str;
|
||||
//! use sanitize_html::rules::predefined::DEFAULT;
|
||||
//!
|
||||
//! let input = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
|
||||
//!
|
||||
//! let sanitized_default: String = sanitize_str(&DEFAULT, input).unwrap();
|
||||
//! assert_eq!(&sanitized_default, "Lorem ipsum dolor sit amet ");
|
||||
//! ```
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub mod errors;
|
||||
mod parse;
|
||||
pub mod rules;
|
||||
mod sanitize;
|
||||
mod tests;
|
||||
|
||||
use crate::errors::SanitizeError;
|
||||
use crate::rules::Rules;
|
||||
|
||||
/// Sanitize HTML bytes
|
||||
pub fn sanitize_bytes(rules: &Rules, input: &[u8]) -> Result<Vec<u8>, SanitizeError> {
|
||||
let input_str = std::str::from_utf8(input).map_err(SanitizeError::StrUtf8Error)?;
|
||||
let dom = parse::parse_str(input_str);
|
||||
let new_dom = sanitize::sanitize_dom(&dom, rules);
|
||||
let result_bytes = parse::unparse_bytes(&new_dom)?;
|
||||
Ok(result_bytes)
|
||||
}
|
||||
|
||||
/// Sanitize HTML string
|
||||
pub fn sanitize_str(rules: &Rules, input: &str) -> Result<String, SanitizeError> {
|
||||
let dom = parse::parse_str(input);
|
||||
let new_dom = sanitize::sanitize_dom(&dom, rules);
|
||||
let result_bytes = parse::unparse_bytes(&new_dom)?;
|
||||
let result_string = String::from_utf8(result_bytes).map_err(SanitizeError::Utf8Error)?;
|
||||
Ok(result_string)
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
use super::errors::SanitizeError;
|
||||
use html5ever::{
|
||||
interface::QualName,
|
||||
local_name, namespace_prefix, namespace_url, ns, serialize,
|
||||
serialize::{SerializeOpts, TraversalScope},
|
||||
tendril::TendrilSink,
|
||||
};
|
||||
use kuchiki::{parse_html_with_options, NodeRef, ParseOpts};
|
||||
use std::default::Default;
|
||||
|
||||
pub(crate) fn parse_str(input: &str) -> NodeRef {
|
||||
let mut opts = ParseOpts::default();
|
||||
opts.tree_builder.drop_doctype = true;
|
||||
|
||||
let mut parser = parse_html_with_options(opts);
|
||||
parser.process(input.into());
|
||||
parser.finish()
|
||||
}
|
||||
|
||||
pub(crate) fn unparse_bytes(dom: &NodeRef) -> Result<Vec<u8>, SanitizeError> {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
|
||||
let parent = QualName::new(
|
||||
Some(namespace_prefix!("html")),
|
||||
ns!(html),
|
||||
local_name!("div"),
|
||||
);
|
||||
|
||||
let opts = SerializeOpts {
|
||||
scripting_enabled: false,
|
||||
traversal_scope: TraversalScope::ChildrenOnly(Some(parent)),
|
||||
create_missing_parent: false,
|
||||
};
|
||||
|
||||
serialize(&mut buf, dom, opts).map_err(SanitizeError::SerializeError)?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
//! Structures to define sanitization rules.
|
||||
|
||||
pub mod pattern;
|
||||
pub mod predefined;
|
||||
|
||||
use self::pattern::Pattern;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// structure to describe HTML element
|
||||
pub struct Element {
|
||||
/// name of an element
|
||||
pub name: String,
|
||||
/// Whitelist of allowed attributes
|
||||
pub attributes: HashMap<String, Pattern>,
|
||||
/// List of mandatory atributes and their values.
|
||||
/// These attributes will be forcibly added to element.
|
||||
pub mandatory_attributes: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl Element {
|
||||
/// Creates element descriptor
|
||||
pub fn new(name: &str) -> Self {
|
||||
Self {
|
||||
name: name.to_owned(),
|
||||
attributes: HashMap::new(),
|
||||
mandatory_attributes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds an attribute
|
||||
pub fn attribute(mut self, attribute: &str, pattern: Pattern) -> Self {
|
||||
self.attributes.insert(attribute.to_owned(), pattern);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds mandatory attribute
|
||||
pub fn mandatory_attribute(mut self, attribute: &str, value: &str) -> Self {
|
||||
self.mandatory_attributes
|
||||
.insert(attribute.to_owned(), value.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Checks if attribute is valid
|
||||
pub fn is_valid(&self, attribute: &str, value: &str) -> bool {
|
||||
match self.attributes.get(attribute) {
|
||||
None => false,
|
||||
Some(pattern) => pattern.matches(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// structure to describe sanitization rules
|
||||
#[derive(Default)]
|
||||
pub struct Rules {
|
||||
/// Determines if comments are kept of stripped out of a document.
|
||||
pub allow_comments: bool,
|
||||
/// Allowed elements.
|
||||
pub allowed_elements: HashMap<String, Element>,
|
||||
/// Elements which will be removed together with their children.
|
||||
pub delete_elements: HashSet<String>,
|
||||
/// Elements which will be replaced by spaces (Their children will be processed recursively).
|
||||
pub space_elements: HashSet<String>,
|
||||
/// Elements which will be renamed.
|
||||
pub rename_elements: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl Rules {
|
||||
/// Creates a new rules set.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Sets if comments are allowed
|
||||
pub fn allow_comments(mut self, allow_comments: bool) -> Self {
|
||||
self.allow_comments = allow_comments;
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule for an allowed element
|
||||
pub fn element(mut self, element: Element) -> Self {
|
||||
self.allowed_elements.insert(element.name.clone(), element);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule to delete an element
|
||||
pub fn delete(mut self, element_name: &str) -> Self {
|
||||
self.delete_elements.insert(element_name.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule to replace an element with space
|
||||
pub fn space(mut self, element_name: &str) -> Self {
|
||||
self.space_elements.insert(element_name.to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a rule to rename an element
|
||||
pub fn rename(mut self, element_name: &str, to: &str) -> Self {
|
||||
self.rename_elements
|
||||
.insert(element_name.to_owned(), to.to_owned());
|
||||
self
|
||||
}
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
//! This module contains code dedicated to check validity of attribute's value.
|
||||
//!
|
||||
//! # Examples
|
||||
//! ```
|
||||
//! use sanitize_html::rules::pattern::Pattern;
|
||||
//! use regex::Regex;
|
||||
//!
|
||||
//! let href = Pattern::regex(Regex::new("^(ftp:|http:|https:|mailto:)").unwrap()) |
|
||||
//! !Pattern::regex(Regex::new("^[^/]+[[:space:]]*:").unwrap());
|
||||
//!
|
||||
//! assert!(href.matches("filename.xls"));
|
||||
//! assert!(href.matches("http://foo.com/"));
|
||||
//! assert!(href.matches(" filename with spaces .zip "));
|
||||
//! assert!(!href.matches(" javascript : window.location = '//example.com/'")); // Attempt to make XSS
|
||||
//! ```
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
/// Value pattern
|
||||
pub struct Pattern(pub Box<dyn Fn(&str) -> bool + Sync + Send>);
|
||||
|
||||
impl Pattern {
|
||||
/// Creates pattern which accepts any value.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pattern = Pattern::any();
|
||||
/// assert!(pattern.matches(""));
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// ```
|
||||
pub fn any() -> Self {
|
||||
Pattern(Box::new(move |_value| true))
|
||||
}
|
||||
|
||||
/// Creates pattern which uses regular expression to check a value. Panics
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pattern = Pattern::regex(Regex::new("ant").unwrap());
|
||||
/// assert!(!pattern.matches(""));
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// ```
|
||||
pub fn regex(re: Regex) -> Self {
|
||||
Pattern(Box::new(move |value| re.is_match(value)))
|
||||
}
|
||||
|
||||
/// Checks if a value matches to a pattern.
|
||||
pub fn matches(&self, value: &str) -> bool {
|
||||
(self.0)(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::ops::Not for Pattern {
|
||||
type Output = Pattern;
|
||||
|
||||
/// Negates pattern
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pattern = !Pattern::any();
|
||||
/// assert!(!pattern.matches(""));
|
||||
/// assert!(!pattern.matches("pants"));
|
||||
/// ```
|
||||
fn not(self) -> Self::Output {
|
||||
let cb = self.0;
|
||||
Pattern(Box::new(move |value| !cb(value)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::ops::BitAnd for Pattern {
|
||||
type Output = Pattern;
|
||||
|
||||
/// Combines two patterns into a pattern which matches a string iff both patterns match that string.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
|
||||
/// let ant = Pattern::regex(Regex::new("ant").unwrap());
|
||||
/// let pattern = pan & ant;
|
||||
///
|
||||
/// assert!(!pattern.matches("pan"));
|
||||
/// assert!(!pattern.matches("ant"));
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// ```
|
||||
fn bitand(self, rhs: Pattern) -> Self::Output {
|
||||
let cb1 = self.0;
|
||||
let cb2 = rhs.0;
|
||||
Pattern(Box::new(move |value| cb1(value) && cb2(value)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::ops::BitOr for Pattern {
|
||||
type Output = Pattern;
|
||||
|
||||
/// Combines two patterns into a pattern which matches a string if one of patterns matches that string.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sanitize_html::rules::pattern::Pattern;
|
||||
/// use regex::Regex;
|
||||
///
|
||||
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
|
||||
/// let pot = Pattern::regex(Regex::new("pot").unwrap());
|
||||
/// let pattern = pan | pot;
|
||||
///
|
||||
/// assert!(pattern.matches("pants"));
|
||||
/// assert!(pattern.matches("pot"));
|
||||
/// assert!(!pattern.matches("jar"));
|
||||
/// ```
|
||||
fn bitor(self, rhs: Pattern) -> Self::Output {
|
||||
let cb1 = self.0;
|
||||
let cb2 = rhs.0;
|
||||
Pattern(Box::new(move |value| cb1(value) || cb2(value)))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,374 @@
|
|||
//! Predefined rules
|
||||
//!
|
||||
//! These rules are inspired by a great Ruby gem [sanitize](https://github.com/rgrove/sanitize/).
|
||||
|
||||
use super::pattern::Pattern;
|
||||
use super::{Element, Rules};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
fn re(regex: &str) -> Pattern {
|
||||
Pattern::regex(Regex::new(regex).unwrap())
|
||||
}
|
||||
|
||||
fn href() -> Pattern {
|
||||
re("^(ftp:|http:|https:|mailto:)") | !re("^[^/]+[[:space:]]*:")
|
||||
}
|
||||
|
||||
fn src() -> Pattern {
|
||||
re("^(http:|https:)") | !re("^[^/]+[[:space:]]*:")
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Basic rules. Allows a variety of markup including formatting elements, links, and lists.
|
||||
pub static ref BASIC: Rules = basic();
|
||||
|
||||
/// Default rules. Removes all tags.
|
||||
pub static ref DEFAULT: Rules = default();
|
||||
|
||||
/// Relaxed rules. Allows an even wider variety of markup, including images and tables
|
||||
pub static ref RELAXED: Rules = relaxed();
|
||||
|
||||
/// Restricted rules. Allows only very simple inline markup. No links, images, or block elements.
|
||||
pub static ref RESTRICTED: Rules = restricted();
|
||||
|
||||
/// Rules for document from untrusted sources. Removes all tags but text emphasizing and links.
|
||||
pub static ref UNTRUSTED: Rules = untrusted();
|
||||
}
|
||||
|
||||
fn basic() -> Rules {
|
||||
Rules::new()
|
||||
.element(Element::new("a").attribute("href", href()))
|
||||
.element(Element::new("abbr").attribute("title", Pattern::any()))
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("blockquote").attribute("cite", src()))
|
||||
.element(Element::new("br"))
|
||||
.element(Element::new("br"))
|
||||
.element(Element::new("cite"))
|
||||
.element(Element::new("code"))
|
||||
.element(Element::new("dd"))
|
||||
.element(Element::new("dfn").attribute("title", Pattern::any()))
|
||||
.element(Element::new("dl"))
|
||||
.element(Element::new("dt"))
|
||||
.element(Element::new("em"))
|
||||
.element(Element::new("i"))
|
||||
.element(Element::new("kbd"))
|
||||
.element(Element::new("li"))
|
||||
.element(Element::new("mark"))
|
||||
.element(Element::new("ol"))
|
||||
.element(Element::new("p"))
|
||||
.element(Element::new("pre"))
|
||||
.element(Element::new("q").attribute("cite", src()))
|
||||
.element(Element::new("s"))
|
||||
.element(Element::new("samp"))
|
||||
.element(Element::new("small"))
|
||||
.element(Element::new("strike"))
|
||||
.element(Element::new("strong"))
|
||||
.element(Element::new("sub"))
|
||||
.element(Element::new("sup"))
|
||||
.element(
|
||||
Element::new("time")
|
||||
.attribute("datetime", Pattern::any())
|
||||
.attribute("pubdate", Pattern::any()),
|
||||
)
|
||||
.element(Element::new("u"))
|
||||
.element(Element::new("ul"))
|
||||
.element(Element::new("var"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("div")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("nav")
|
||||
.space("section")
|
||||
}
|
||||
|
||||
fn default() -> Rules {
|
||||
Rules::new()
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("blockquote")
|
||||
.space("br")
|
||||
.space("dd")
|
||||
.space("div")
|
||||
.space("dl")
|
||||
.space("dt")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("li")
|
||||
.space("nav")
|
||||
.space("ol")
|
||||
.space("p")
|
||||
.space("pre")
|
||||
.space("section")
|
||||
.space("ul")
|
||||
.delete("iframe")
|
||||
.delete("noembed")
|
||||
.delete("noframes")
|
||||
.delete("noscript")
|
||||
.delete("script")
|
||||
.delete("style")
|
||||
}
|
||||
|
||||
fn relaxed() -> Rules {
|
||||
fn relaxed_element(name: &str) -> Element {
|
||||
Element::new(name)
|
||||
.attribute("dir", Pattern::any())
|
||||
.attribute("lang", Pattern::any())
|
||||
.attribute("title", Pattern::any())
|
||||
.attribute("class", Pattern::any())
|
||||
}
|
||||
|
||||
Rules::new()
|
||||
.element(relaxed_element("a").attribute("href", href()))
|
||||
.element(relaxed_element("abbr"))
|
||||
.element(relaxed_element("b"))
|
||||
.element(relaxed_element("bdo"))
|
||||
.element(relaxed_element("blockquote").attribute("cite", src()))
|
||||
.element(relaxed_element("br"))
|
||||
.element(relaxed_element("caption"))
|
||||
.element(relaxed_element("cite"))
|
||||
.element(relaxed_element("code"))
|
||||
.element(
|
||||
relaxed_element("col")
|
||||
.attribute("span", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(
|
||||
relaxed_element("colgroup")
|
||||
.attribute("span", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("dd"))
|
||||
.element(
|
||||
relaxed_element("del")
|
||||
.attribute("cite", src())
|
||||
.attribute("datetime", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("dfn"))
|
||||
.element(relaxed_element("dl"))
|
||||
.element(relaxed_element("dt"))
|
||||
.element(relaxed_element("em"))
|
||||
.element(relaxed_element("figcaption"))
|
||||
.element(relaxed_element("figure"))
|
||||
.element(relaxed_element("h1"))
|
||||
.element(relaxed_element("h2"))
|
||||
.element(relaxed_element("h3"))
|
||||
.element(relaxed_element("h4"))
|
||||
.element(relaxed_element("h5"))
|
||||
.element(relaxed_element("h6"))
|
||||
.element(relaxed_element("hgroup"))
|
||||
.element(relaxed_element("i"))
|
||||
.element(
|
||||
relaxed_element("img")
|
||||
.attribute("src", src())
|
||||
.attribute("align", Pattern::any())
|
||||
.attribute("alt", Pattern::any())
|
||||
.attribute("width", Pattern::any())
|
||||
.attribute("height", Pattern::any()),
|
||||
)
|
||||
.element(
|
||||
relaxed_element("ins")
|
||||
.attribute("cite", src())
|
||||
.attribute("datetime", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("kbd"))
|
||||
.element(relaxed_element("li"))
|
||||
.element(relaxed_element("mark"))
|
||||
.element(
|
||||
relaxed_element("ol")
|
||||
.attribute("start", Pattern::any())
|
||||
.attribute("reversed", Pattern::any())
|
||||
.attribute("type", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("p"))
|
||||
.element(relaxed_element("pre"))
|
||||
.element(relaxed_element("q").attribute("cite", src()))
|
||||
.element(relaxed_element("rp"))
|
||||
.element(relaxed_element("rt"))
|
||||
.element(relaxed_element("ruby"))
|
||||
.element(relaxed_element("s"))
|
||||
.element(relaxed_element("samp"))
|
||||
.element(relaxed_element("small"))
|
||||
.element(relaxed_element("strike"))
|
||||
.element(relaxed_element("strong"))
|
||||
.element(relaxed_element("sub"))
|
||||
.element(relaxed_element("sup"))
|
||||
.element(
|
||||
relaxed_element("table")
|
||||
.attribute("summary", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("tbody"))
|
||||
.element(
|
||||
relaxed_element("td")
|
||||
.attribute("abbr", Pattern::any())
|
||||
.attribute("axis", Pattern::any())
|
||||
.attribute("colspan", Pattern::any())
|
||||
.attribute("rowspan", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("tfoot"))
|
||||
.element(
|
||||
relaxed_element("th")
|
||||
.attribute("abbr", Pattern::any())
|
||||
.attribute("axis", Pattern::any())
|
||||
.attribute("colspan", Pattern::any())
|
||||
.attribute("rowspan", Pattern::any())
|
||||
.attribute("scope", Pattern::any())
|
||||
.attribute("width", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("thead"))
|
||||
.element(
|
||||
relaxed_element("time")
|
||||
.attribute("datetime", Pattern::any())
|
||||
.attribute("pubdate", Pattern::any()),
|
||||
)
|
||||
.element(relaxed_element("tr"))
|
||||
.element(relaxed_element("u"))
|
||||
.element(relaxed_element("ul").attribute("type", Pattern::any()))
|
||||
.element(relaxed_element("var"))
|
||||
.element(relaxed_element("wbr"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("footer")
|
||||
.space("header")
|
||||
.space("hr")
|
||||
.space("nav")
|
||||
.space("section")
|
||||
}
|
||||
|
||||
fn restricted() -> Rules {
|
||||
Rules::new()
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("em"))
|
||||
.element(Element::new("i"))
|
||||
.element(Element::new("strong"))
|
||||
.element(Element::new("u"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("blockquote")
|
||||
.space("br")
|
||||
.space("dd")
|
||||
.space("div")
|
||||
.space("dl")
|
||||
.space("dt")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("li")
|
||||
.space("nav")
|
||||
.space("ol")
|
||||
.space("p")
|
||||
.space("pre")
|
||||
.space("section")
|
||||
.space("ul")
|
||||
}
|
||||
|
||||
fn untrusted() -> Rules {
|
||||
Rules::new()
|
||||
.element(
|
||||
Element::new("a")
|
||||
.attribute("href", href())
|
||||
.mandatory_attribute("target", "_blank")
|
||||
.mandatory_attribute("rel", "noreferrer noopener"),
|
||||
)
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("em"))
|
||||
.element(Element::new("i"))
|
||||
.element(Element::new("strong"))
|
||||
.element(Element::new("u"))
|
||||
.space("address")
|
||||
.space("article")
|
||||
.space("aside")
|
||||
.space("blockquote")
|
||||
.space("br")
|
||||
.space("dd")
|
||||
.space("div")
|
||||
.space("dl")
|
||||
.space("dt")
|
||||
.space("footer")
|
||||
.space("h1")
|
||||
.space("h2")
|
||||
.space("h3")
|
||||
.space("h4")
|
||||
.space("h5")
|
||||
.space("h6")
|
||||
.space("header")
|
||||
.space("hgroup")
|
||||
.space("hr")
|
||||
.space("li")
|
||||
.space("nav")
|
||||
.space("ol")
|
||||
.space("p")
|
||||
.space("pre")
|
||||
.space("section")
|
||||
.space("ul")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{basic, default, relaxed, restricted, untrusted};
|
||||
|
||||
#[test]
|
||||
fn basic_does_not_fail() {
|
||||
let rules = basic();
|
||||
assert_eq!(rules.allowed_elements.len(), 31);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_does_not_fail() {
|
||||
let rules = default();
|
||||
assert_eq!(rules.allowed_elements.len(), 0);
|
||||
assert_eq!(rules.space_elements.len(), 26);
|
||||
assert_eq!(rules.delete_elements.len(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relaxed_does_not_fail() {
|
||||
let rules = relaxed();
|
||||
assert_eq!(rules.allowed_elements.len(), 58);
|
||||
assert_eq!(rules.space_elements.len(), 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restricted_does_not_fail() {
|
||||
let rules = restricted();
|
||||
assert_eq!(rules.allowed_elements.len(), 5);
|
||||
assert_eq!(rules.space_elements.len(), 26);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn untrusted_does_not_fail() {
|
||||
let rules = untrusted();
|
||||
assert_eq!(rules.allowed_elements.len(), 6);
|
||||
assert_eq!(rules.space_elements.len(), 26);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
use crate::rules::{Element, Rules};
|
||||
use html5ever::{interface::QualName, namespace_url, ns, LocalName};
|
||||
use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef};
|
||||
|
||||
fn simple_qual_name(name: &str) -> QualName {
|
||||
QualName::new(None, ns!(), LocalName::from(name))
|
||||
}
|
||||
|
||||
fn qual_name_to_string(name: &QualName) -> String {
|
||||
if name.ns == ns!(html) || name.ns.is_empty() {
|
||||
name.local.to_lowercase()
|
||||
} else {
|
||||
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
||||
}
|
||||
}
|
||||
|
||||
fn expanded_name_to_string(name: &ExpandedName) -> String {
|
||||
if name.ns == ns!(html) || name.ns.is_empty() {
|
||||
name.local.to_lowercase()
|
||||
} else {
|
||||
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
||||
}
|
||||
}
|
||||
|
||||
fn simple_element(
|
||||
name: QualName,
|
||||
attrs: Vec<(ExpandedName, Attribute)>,
|
||||
children: Vec<NodeRef>,
|
||||
) -> NodeRef {
|
||||
let element = NodeRef::new_element(name, attrs);
|
||||
for child in children {
|
||||
child.detach();
|
||||
element.append(child);
|
||||
}
|
||||
element
|
||||
}
|
||||
|
||||
fn create_space_text() -> NodeRef {
|
||||
NodeRef::new_text(" ")
|
||||
}
|
||||
|
||||
enum ElementAction<'t> {
|
||||
Keep(&'t Element),
|
||||
Delete,
|
||||
Space,
|
||||
Elide,
|
||||
Rename(&'t str),
|
||||
}
|
||||
|
||||
fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> {
|
||||
let name = qual_name_to_string(element_name);
|
||||
if name == "html" || name == "body" {
|
||||
ElementAction::Elide
|
||||
} else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) {
|
||||
ElementAction::Keep(element_sanitizer)
|
||||
} else if rules.delete_elements.contains(&name) {
|
||||
ElementAction::Delete
|
||||
} else if rules.space_elements.contains(&name) {
|
||||
ElementAction::Space
|
||||
} else if let Some(rename_to) = rules.rename_elements.get(&name) {
|
||||
ElementAction::Rename(rename_to)
|
||||
} else {
|
||||
ElementAction::Elide
|
||||
}
|
||||
}
|
||||
|
||||
fn clean_nodes(nodes: impl IntoIterator<Item = NodeRef>, rules: &Rules) -> Vec<NodeRef> {
|
||||
let mut result = Vec::new();
|
||||
for node in nodes {
|
||||
let subnodes = clean_node(&node, rules);
|
||||
result.extend(subnodes);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn clean_node(node: &NodeRef, rules: &Rules) -> Vec<NodeRef> {
|
||||
match node.data() {
|
||||
NodeData::Document(..) => vec![],
|
||||
NodeData::DocumentFragment => vec![], // TODO: ??
|
||||
NodeData::Doctype(..) => vec![],
|
||||
NodeData::ProcessingInstruction(..) => vec![],
|
||||
|
||||
NodeData::Text(..) => vec![node.clone()],
|
||||
|
||||
NodeData::Comment(..) => {
|
||||
if rules.allow_comments {
|
||||
vec![node.clone()]
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
NodeData::Element(ElementData {
|
||||
ref name,
|
||||
ref attributes,
|
||||
..
|
||||
}) => {
|
||||
match element_action(name, rules) {
|
||||
ElementAction::Keep(element_sanitizer) => {
|
||||
let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new();
|
||||
|
||||
/* whitelisted attributes */
|
||||
for (attr_name, attr_value) in attributes.borrow().map.iter() {
|
||||
if element_sanitizer
|
||||
.is_valid(&expanded_name_to_string(attr_name), &attr_value.value)
|
||||
{
|
||||
new_attrs.push((attr_name.clone(), attr_value.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
/* mandatory attributes */
|
||||
let mut mandatory_attributes: Vec<(&String, &String)> =
|
||||
element_sanitizer.mandatory_attributes.iter().collect();
|
||||
mandatory_attributes.sort();
|
||||
for &(attr_name, attr_value) in mandatory_attributes.iter() {
|
||||
new_attrs.push((
|
||||
ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())),
|
||||
Attribute {
|
||||
prefix: None,
|
||||
value: attr_value.into(),
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
let children = clean_nodes(node.children(), rules);
|
||||
let element = simple_element(name.clone(), new_attrs, children);
|
||||
|
||||
vec![element]
|
||||
}
|
||||
|
||||
ElementAction::Delete => vec![],
|
||||
|
||||
ElementAction::Elide => clean_nodes(node.children(), rules),
|
||||
|
||||
ElementAction::Space => {
|
||||
let mut nodes = clean_nodes(node.children(), rules);
|
||||
if nodes.is_empty() {
|
||||
nodes.push(create_space_text());
|
||||
} else {
|
||||
nodes.insert(0, create_space_text());
|
||||
nodes.push(create_space_text());
|
||||
}
|
||||
nodes
|
||||
}
|
||||
|
||||
ElementAction::Rename(rename_to) => {
|
||||
let children = clean_nodes(node.children(), rules);
|
||||
vec![simple_element(
|
||||
simple_qual_name(rename_to),
|
||||
Vec::new(),
|
||||
children,
|
||||
)]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef {
|
||||
let new_children = clean_nodes(dom.children(), mode);
|
||||
let new_dom = NodeRef::new_document();
|
||||
for child in new_children {
|
||||
child.detach();
|
||||
new_dom.append(child);
|
||||
}
|
||||
new_dom
|
||||
}
|
|
@ -0,0 +1,645 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use super::rules::predefined::*;
|
||||
use super::rules::{Element, Rules};
|
||||
use super::sanitize_str;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert_eq!(&sanitize_str(&BASIC, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&DEFAULT, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&RELAXED, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&RESTRICTED, "").unwrap(), "");
|
||||
assert_eq!(&sanitize_str(&UNTRUSTED, "").unwrap(), "");
|
||||
}
|
||||
|
||||
/* basic */
|
||||
|
||||
const BASIC_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
|
||||
|
||||
#[test]
|
||||
fn basic_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, BASIC_HTML).unwrap(),
|
||||
"Lorem ipsum dolor sit amet "
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, BASIC_HTML).unwrap(),
|
||||
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, BASIC_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a href=\"pants\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, BASIC_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
/* malformed */
|
||||
|
||||
const MALFORMED_HTML: &str = "Lo<!-- comment -->rem</b> <a href=pants title=\"foo>ipsum <a href=\"http://foo.com/\"><strong>dolor</a></strong> sit<br/>amet <script>alert(\"hello world\");";
|
||||
|
||||
#[test]
|
||||
fn malformed_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, MALFORMED_HTML).unwrap(),
|
||||
"Lorem dolor sit amet "
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, MALFORMED_HTML).unwrap(),
|
||||
"Lorem <strong>dolor</strong> sit amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, MALFORMED_HTML).unwrap(),
|
||||
"Lorem <a href=\"pants\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, MALFORMED_HTML).unwrap(),
|
||||
"Lorem <a href=\"pants\" title=\"foo>ipsum <a href=\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
/* unclosed */
|
||||
|
||||
const UNCLOSED_HTML: &str = "<p>a</p><blockquote>b";
|
||||
|
||||
#[test]
|
||||
fn unclosed_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, UNCLOSED_HTML).unwrap(), " a b ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_restricted() {
|
||||
assert_eq!(&sanitize_str(&RESTRICTED, UNCLOSED_HTML).unwrap(), " a b ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, UNCLOSED_HTML).unwrap(),
|
||||
"<p>a</p><blockquote>b</blockquote>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, UNCLOSED_HTML).unwrap(),
|
||||
"<p>a</p><blockquote>b</blockquote>"
|
||||
);
|
||||
}
|
||||
|
||||
/* malicious */
|
||||
|
||||
const MALICIOUS_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert(\"hello world\");</script>";
|
||||
|
||||
#[test]
|
||||
fn malicious_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, MALICIOUS_HTML).unwrap(),
|
||||
"Lorem ipsum dolor sit amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a>ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_untrusted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&UNTRUSTED, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a rel=\"noreferrer noopener\" target=\"_blank\">ipsum</a> <a href=\"http://foo.com/\" rel=\"noreferrer noopener\" target=\"_blank\"><strong>dolor</strong></a> sit amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malicious_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, MALICIOUS_HTML).unwrap(),
|
||||
"<b>Lorem</b> <a title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet <script>alert(\"hello world\");"
|
||||
);
|
||||
}
|
||||
|
||||
/* raw-comment */
|
||||
|
||||
const RAW_COMMENT_HTML: &str = "<!-- comment -->Hello";
|
||||
|
||||
#[test]
|
||||
fn raw_comment_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_comment_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, RAW_COMMENT_HTML).unwrap(),
|
||||
"Hello"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_comment_basic() {
|
||||
assert_eq!(&sanitize_str(&BASIC, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_comment_relaxed() {
|
||||
assert_eq!(&sanitize_str(&RELAXED, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, no spaces */
|
||||
|
||||
const JS_INJECTION_HTML_1: &str = "<a href=\"javascript:alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_1).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_1).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_1).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_1_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_1).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, spaces before */
|
||||
|
||||
const JS_INJECTION_HTML_2: &str = "<a href=\"javascript :alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_2).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_2).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_2).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_2_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_2).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, spaces after */
|
||||
|
||||
const JS_INJECTION_HTML_3: &str = "<a href=\"javascript: alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_3).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_3).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_3).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_3_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_3).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: simple, spaces before and after */
|
||||
|
||||
const JS_INJECTION_HTML_4: &str = "<a href=\"javascript : alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_4).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_4).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_4).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_4_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_4).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: preceding colon */
|
||||
|
||||
const JS_INJECTION_HTML_5: &str = "<a href=\":javascript:alert(\'XSS\');\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_5).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_5).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_5).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_5_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_5).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: UTF-8 encoding */
|
||||
|
||||
const JS_INJECTION_HTML_6: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_6).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_6).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_6).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_6_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_6).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: long UTF-8 encoding */
|
||||
|
||||
const JS_INJECTION_HTML_7: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_7).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_7).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_7).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_7_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_7).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: long UTF-8 encoding without semicolons */
|
||||
|
||||
const JS_INJECTION_HTML_8: &str = "<a href=javascript:alert('XSS')>foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_8).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_8).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_8).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_8_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_8).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: hex encoding */
|
||||
|
||||
const JS_INJECTION_HTML_9: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_default() {
|
||||
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_9).unwrap(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_9).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_9).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_9_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_9).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: long hex encoding */
|
||||
|
||||
const JS_INJECTION_HTML_10: &str = "<a href=\"javascript:\">foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_10).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_10).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_10).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_10_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_10).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* protocol-based JS injection: hex encoding without semicolons */
|
||||
|
||||
const JS_INJECTION_HTML_11: &str = "<a href=javascript:alert('XSS')>foo</a>";
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_default() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_11).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_restricted() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_11).unwrap(),
|
||||
"foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_basic() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&BASIC, JS_INJECTION_HTML_11).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_injection_11_relaxed() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&RELAXED, JS_INJECTION_HTML_11).unwrap(),
|
||||
"<a>foo</a>"
|
||||
);
|
||||
}
|
||||
|
||||
/* should translate valid HTML entities */
|
||||
|
||||
#[test]
|
||||
fn misc_1() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "Don't tasé me & bro!").unwrap(),
|
||||
"Don't tasé me & bro!"
|
||||
);
|
||||
}
|
||||
|
||||
/* should translate valid HTML entities while encoding unencoded ampersands */
|
||||
|
||||
#[test]
|
||||
fn misc_2() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "cookies² & ¼ créme").unwrap(),
|
||||
"cookies² & ¼ créme"
|
||||
);
|
||||
}
|
||||
|
||||
/* should never output ' */
|
||||
|
||||
#[test]
|
||||
fn misc_3() {
|
||||
assert_eq!(
|
||||
&sanitize_str(
|
||||
&DEFAULT,
|
||||
"<a href=''' class=\"' '\">IE6 isn't a real browser</a>"
|
||||
)
|
||||
.unwrap(),
|
||||
"IE6 isn't a real browser"
|
||||
);
|
||||
}
|
||||
|
||||
/* should not choke on several instances of the same element in a row */
|
||||
|
||||
#[test]
|
||||
fn misc_4() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "<img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\">").unwrap(),
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
/* should surround the contents of :whitespace_elements with space characters when removing the element */
|
||||
|
||||
#[test]
|
||||
fn misc_5() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "foo<div>bar</div>baz").unwrap(),
|
||||
"foo bar baz"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn misc_6() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "foo<br>bar<br>baz").unwrap(),
|
||||
"foo bar baz"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn misc_7() {
|
||||
assert_eq!(
|
||||
&sanitize_str(&DEFAULT, "foo<hr>bar<hr>baz").unwrap(),
|
||||
"foo bar baz"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn custom_rules() {
|
||||
let rules = Rules::new()
|
||||
.allow_comments(true)
|
||||
.element(Element::new("b"))
|
||||
.element(Element::new("span"))
|
||||
.delete("script")
|
||||
.delete("style")
|
||||
.space("br")
|
||||
.rename("strong", "span");
|
||||
|
||||
let html = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\")</script>";
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_str(&rules, html).unwrap(),
|
||||
"<b>Lo<!-- comment -->rem</b> ipsum <span>dolor</span> sit amet "
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue