Add private fork of sanitize-html-rs
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
parent
36da496aa1
commit
4e3f7b46da
|
@ -747,6 +747,16 @@ version = "1.0.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
|
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sanitize_html"
|
||||||
|
version = "0.7.0"
|
||||||
|
dependencies = [
|
||||||
|
"html5ever",
|
||||||
|
"kuchiki",
|
||||||
|
"lazy_static",
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scopeguard"
|
name = "scopeguard"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
|
|
|
@ -2,4 +2,5 @@
|
||||||
|
|
||||||
members = [
|
members = [
|
||||||
"bin",
|
"bin",
|
||||||
]
|
"sanitize-html-rs",
|
||||||
|
]
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
name: Build
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os:
|
||||||
|
- ubuntu-latest
|
||||||
|
- macOS-latest
|
||||||
|
- windows-latest
|
||||||
|
rust:
|
||||||
|
- stable
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: ${{ matrix.rust }}
|
||||||
|
override: true
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
cargo build --all-targets --no-default-features --verbose
|
||||||
|
cargo build --all-targets --verbose
|
||||||
|
- name: Run tests
|
||||||
|
run: cargo test --all-targets --verbose
|
||||||
|
env:
|
||||||
|
RUST_BACKTRACE: 1
|
|
@ -0,0 +1,27 @@
|
||||||
|
name: Coverage
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
coverage:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: stable
|
||||||
|
override: true
|
||||||
|
- uses: actions-rs/install@v0.1
|
||||||
|
with:
|
||||||
|
crate: cargo-tarpaulin
|
||||||
|
use-tool-cache: true
|
||||||
|
- name: Run coverage
|
||||||
|
run: cargo tarpaulin -f -t 5 --out Xml -v -- --test-threads=1
|
||||||
|
- name: Upload coverage to Codecov
|
||||||
|
uses: codecov/codecov-action@v1
|
||||||
|
with:
|
||||||
|
token: ${{secrets.CODECOV_TOKEN}}
|
|
@ -0,0 +1,24 @@
|
||||||
|
name: Style check
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
clippy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Install clippy
|
||||||
|
uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: stable
|
||||||
|
components: clippy
|
||||||
|
- uses: actions-rs/clippy-check@v1
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
args: --all --all-features
|
||||||
|
fmt:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v1
|
||||||
|
- name: Run fmt check
|
||||||
|
run: cargo fmt --all -- --check
|
|
@ -0,0 +1,4 @@
|
||||||
|
/target/
|
||||||
|
**/*.rs.bk
|
||||||
|
Cargo.lock
|
||||||
|
/.vscode
|
|
@ -0,0 +1,16 @@
|
||||||
|
[package]
|
||||||
|
name = "sanitize_html"
|
||||||
|
version = "0.7.0"
|
||||||
|
authors = ["Andrey Kutejko <andy128k@gmail.com>"]
|
||||||
|
description = "Rule-based HTML Sanitization library"
|
||||||
|
keywords = ["html", "sanitize"]
|
||||||
|
license = "MIT"
|
||||||
|
homepage = "https://github.com/andy128k/sanitize-html-rs"
|
||||||
|
repository = "https://github.com/andy128k/sanitize-html-rs.git"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
regex = "1"
|
||||||
|
lazy_static = "1"
|
||||||
|
html5ever = "0.25"
|
||||||
|
kuchiki = "0.8"
|
|
@ -0,0 +1,18 @@
|
||||||
|
Copyright (c) 2017 Andrey Kutejko
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,8 @@
|
||||||
|
# Sanitize HTML
|
||||||
|
|
||||||
|
[![Crates.io Status](https://img.shields.io/crates/v/sanitize_html.svg)](https://crates.io/crates/sanitize_html)
|
||||||
|
[![Build](https://github.com/andy128k/sanitize-html-rs/workflows/Build/badge.svg?branch=master&event=push)](https://github.com/andy128k/sanitize-html-rs/actions?query=workflow%3ABuild)
|
||||||
|
[![codecov](https://codecov.io/gh/andy128k/sanitize-html-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/andy128k/sanitize-html-rs)
|
||||||
|
[![dependency status](https://deps.rs/repo/github/andy128k/sanitize-html-rs/status.svg)](https://deps.rs/repo/github/andy128k/sanitize-html-rs)
|
||||||
|
|
||||||
|
This is a library for sanitization of HTML fragments.
|
|
@ -0,0 +1,37 @@
|
||||||
|
//! Error types, which can be emited by sanitization procedure.
|
||||||
|
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
/// Sanitization error
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum SanitizeError {
|
||||||
|
/// UTF-8 decoding error
|
||||||
|
StrUtf8Error(std::str::Utf8Error),
|
||||||
|
|
||||||
|
/// UTF-8 decoding error
|
||||||
|
Utf8Error(std::string::FromUtf8Error),
|
||||||
|
|
||||||
|
/// Serialization error
|
||||||
|
SerializeError(std::io::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for SanitizeError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
SanitizeError::StrUtf8Error(e) => write!(f, "UTF-8 decode error {}", e),
|
||||||
|
SanitizeError::Utf8Error(e) => write!(f, "UTF-8 decode error {}", e),
|
||||||
|
SanitizeError::SerializeError(e) => write!(f, "Serialization error {}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for SanitizeError {
|
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
SanitizeError::StrUtf8Error(e) => Some(e),
|
||||||
|
SanitizeError::Utf8Error(e) => Some(e),
|
||||||
|
SanitizeError::SerializeError(e) => Some(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
//! HTML Sanitization library
|
||||||
|
//!
|
||||||
|
//! # Examples
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use sanitize_html::sanitize_str;
|
||||||
|
//! use sanitize_html::rules::predefined::DEFAULT;
|
||||||
|
//!
|
||||||
|
//! let input = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
|
||||||
|
//!
|
||||||
|
//! let sanitized_default: String = sanitize_str(&DEFAULT, input).unwrap();
|
||||||
|
//! assert_eq!(&sanitized_default, "Lorem ipsum dolor sit amet ");
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
|
pub mod errors;
|
||||||
|
mod parse;
|
||||||
|
pub mod rules;
|
||||||
|
mod sanitize;
|
||||||
|
mod tests;
|
||||||
|
|
||||||
|
use crate::errors::SanitizeError;
|
||||||
|
use crate::rules::Rules;
|
||||||
|
|
||||||
|
/// Sanitize HTML bytes
|
||||||
|
pub fn sanitize_bytes(rules: &Rules, input: &[u8]) -> Result<Vec<u8>, SanitizeError> {
|
||||||
|
let input_str = std::str::from_utf8(input).map_err(SanitizeError::StrUtf8Error)?;
|
||||||
|
let dom = parse::parse_str(input_str);
|
||||||
|
let new_dom = sanitize::sanitize_dom(&dom, rules);
|
||||||
|
let result_bytes = parse::unparse_bytes(&new_dom)?;
|
||||||
|
Ok(result_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sanitize HTML string
|
||||||
|
pub fn sanitize_str(rules: &Rules, input: &str) -> Result<String, SanitizeError> {
|
||||||
|
let dom = parse::parse_str(input);
|
||||||
|
let new_dom = sanitize::sanitize_dom(&dom, rules);
|
||||||
|
let result_bytes = parse::unparse_bytes(&new_dom)?;
|
||||||
|
let result_string = String::from_utf8(result_bytes).map_err(SanitizeError::Utf8Error)?;
|
||||||
|
Ok(result_string)
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
use super::errors::SanitizeError;
|
||||||
|
use html5ever::{
|
||||||
|
interface::QualName,
|
||||||
|
local_name, namespace_prefix, namespace_url, ns, serialize,
|
||||||
|
serialize::{SerializeOpts, TraversalScope},
|
||||||
|
tendril::TendrilSink,
|
||||||
|
};
|
||||||
|
use kuchiki::{parse_html_with_options, NodeRef, ParseOpts};
|
||||||
|
use std::default::Default;
|
||||||
|
|
||||||
|
pub(crate) fn parse_str(input: &str) -> NodeRef {
|
||||||
|
let mut opts = ParseOpts::default();
|
||||||
|
opts.tree_builder.drop_doctype = true;
|
||||||
|
|
||||||
|
let mut parser = parse_html_with_options(opts);
|
||||||
|
parser.process(input.into());
|
||||||
|
parser.finish()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn unparse_bytes(dom: &NodeRef) -> Result<Vec<u8>, SanitizeError> {
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
|
||||||
|
let parent = QualName::new(
|
||||||
|
Some(namespace_prefix!("html")),
|
||||||
|
ns!(html),
|
||||||
|
local_name!("div"),
|
||||||
|
);
|
||||||
|
|
||||||
|
let opts = SerializeOpts {
|
||||||
|
scripting_enabled: false,
|
||||||
|
traversal_scope: TraversalScope::ChildrenOnly(Some(parent)),
|
||||||
|
create_missing_parent: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
serialize(&mut buf, dom, opts).map_err(SanitizeError::SerializeError)?;
|
||||||
|
|
||||||
|
Ok(buf)
|
||||||
|
}
|
|
@ -0,0 +1,104 @@
|
||||||
|
//! Structures to define sanitization rules.
|
||||||
|
|
||||||
|
pub mod pattern;
|
||||||
|
pub mod predefined;
|
||||||
|
|
||||||
|
use self::pattern::Pattern;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
/// structure to describe HTML element
|
||||||
|
pub struct Element {
|
||||||
|
/// name of an element
|
||||||
|
pub name: String,
|
||||||
|
/// Whitelist of allowed attributes
|
||||||
|
pub attributes: HashMap<String, Pattern>,
|
||||||
|
/// List of mandatory atributes and their values.
|
||||||
|
/// These attributes will be forcibly added to element.
|
||||||
|
pub mandatory_attributes: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Element {
|
||||||
|
/// Creates element descriptor
|
||||||
|
pub fn new(name: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
name: name.to_owned(),
|
||||||
|
attributes: HashMap::new(),
|
||||||
|
mandatory_attributes: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds an attribute
|
||||||
|
pub fn attribute(mut self, attribute: &str, pattern: Pattern) -> Self {
|
||||||
|
self.attributes.insert(attribute.to_owned(), pattern);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds mandatory attribute
|
||||||
|
pub fn mandatory_attribute(mut self, attribute: &str, value: &str) -> Self {
|
||||||
|
self.mandatory_attributes
|
||||||
|
.insert(attribute.to_owned(), value.to_owned());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if attribute is valid
|
||||||
|
pub fn is_valid(&self, attribute: &str, value: &str) -> bool {
|
||||||
|
match self.attributes.get(attribute) {
|
||||||
|
None => false,
|
||||||
|
Some(pattern) => pattern.matches(value),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// structure to describe sanitization rules
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Rules {
|
||||||
|
/// Determines if comments are kept of stripped out of a document.
|
||||||
|
pub allow_comments: bool,
|
||||||
|
/// Allowed elements.
|
||||||
|
pub allowed_elements: HashMap<String, Element>,
|
||||||
|
/// Elements which will be removed together with their children.
|
||||||
|
pub delete_elements: HashSet<String>,
|
||||||
|
/// Elements which will be replaced by spaces (Their children will be processed recursively).
|
||||||
|
pub space_elements: HashSet<String>,
|
||||||
|
/// Elements which will be renamed.
|
||||||
|
pub rename_elements: HashMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Rules {
|
||||||
|
/// Creates a new rules set.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets if comments are allowed
|
||||||
|
pub fn allow_comments(mut self, allow_comments: bool) -> Self {
|
||||||
|
self.allow_comments = allow_comments;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a rule for an allowed element
|
||||||
|
pub fn element(mut self, element: Element) -> Self {
|
||||||
|
self.allowed_elements.insert(element.name.clone(), element);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a rule to delete an element
|
||||||
|
pub fn delete(mut self, element_name: &str) -> Self {
|
||||||
|
self.delete_elements.insert(element_name.to_owned());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a rule to replace an element with space
|
||||||
|
pub fn space(mut self, element_name: &str) -> Self {
|
||||||
|
self.space_elements.insert(element_name.to_owned());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a rule to rename an element
|
||||||
|
pub fn rename(mut self, element_name: &str, to: &str) -> Self {
|
||||||
|
self.rename_elements
|
||||||
|
.insert(element_name.to_owned(), to.to_owned());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
//! This module contains code dedicated to check validity of attribute's value.
|
||||||
|
//!
|
||||||
|
//! # Examples
|
||||||
|
//! ```
|
||||||
|
//! use sanitize_html::rules::pattern::Pattern;
|
||||||
|
//! use regex::Regex;
|
||||||
|
//!
|
||||||
|
//! let href = Pattern::regex(Regex::new("^(ftp:|http:|https:|mailto:)").unwrap()) |
|
||||||
|
//! !Pattern::regex(Regex::new("^[^/]+[[:space:]]*:").unwrap());
|
||||||
|
//!
|
||||||
|
//! assert!(href.matches("filename.xls"));
|
||||||
|
//! assert!(href.matches("http://foo.com/"));
|
||||||
|
//! assert!(href.matches(" filename with spaces .zip "));
|
||||||
|
//! assert!(!href.matches(" javascript : window.location = '//example.com/'")); // Attempt to make XSS
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
/// Value pattern
|
||||||
|
pub struct Pattern(pub Box<dyn Fn(&str) -> bool + Sync + Send>);
|
||||||
|
|
||||||
|
impl Pattern {
|
||||||
|
/// Creates pattern which accepts any value.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use sanitize_html::rules::pattern::Pattern;
|
||||||
|
/// use regex::Regex;
|
||||||
|
///
|
||||||
|
/// let pattern = Pattern::any();
|
||||||
|
/// assert!(pattern.matches(""));
|
||||||
|
/// assert!(pattern.matches("pants"));
|
||||||
|
/// ```
|
||||||
|
pub fn any() -> Self {
|
||||||
|
Pattern(Box::new(move |_value| true))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates pattern which uses regular expression to check a value. Panics
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use sanitize_html::rules::pattern::Pattern;
|
||||||
|
/// use regex::Regex;
|
||||||
|
///
|
||||||
|
/// let pattern = Pattern::regex(Regex::new("ant").unwrap());
|
||||||
|
/// assert!(!pattern.matches(""));
|
||||||
|
/// assert!(pattern.matches("pants"));
|
||||||
|
/// ```
|
||||||
|
pub fn regex(re: Regex) -> Self {
|
||||||
|
Pattern(Box::new(move |value| re.is_match(value)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if a value matches to a pattern.
|
||||||
|
pub fn matches(&self, value: &str) -> bool {
|
||||||
|
(self.0)(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ::std::ops::Not for Pattern {
|
||||||
|
type Output = Pattern;
|
||||||
|
|
||||||
|
/// Negates pattern
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use sanitize_html::rules::pattern::Pattern;
|
||||||
|
/// use regex::Regex;
|
||||||
|
///
|
||||||
|
/// let pattern = !Pattern::any();
|
||||||
|
/// assert!(!pattern.matches(""));
|
||||||
|
/// assert!(!pattern.matches("pants"));
|
||||||
|
/// ```
|
||||||
|
fn not(self) -> Self::Output {
|
||||||
|
let cb = self.0;
|
||||||
|
Pattern(Box::new(move |value| !cb(value)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ::std::ops::BitAnd for Pattern {
|
||||||
|
type Output = Pattern;
|
||||||
|
|
||||||
|
/// Combines two patterns into a pattern which matches a string iff both patterns match that string.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use sanitize_html::rules::pattern::Pattern;
|
||||||
|
/// use regex::Regex;
|
||||||
|
///
|
||||||
|
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
|
||||||
|
/// let ant = Pattern::regex(Regex::new("ant").unwrap());
|
||||||
|
/// let pattern = pan & ant;
|
||||||
|
///
|
||||||
|
/// assert!(!pattern.matches("pan"));
|
||||||
|
/// assert!(!pattern.matches("ant"));
|
||||||
|
/// assert!(pattern.matches("pants"));
|
||||||
|
/// ```
|
||||||
|
fn bitand(self, rhs: Pattern) -> Self::Output {
|
||||||
|
let cb1 = self.0;
|
||||||
|
let cb2 = rhs.0;
|
||||||
|
Pattern(Box::new(move |value| cb1(value) && cb2(value)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ::std::ops::BitOr for Pattern {
|
||||||
|
type Output = Pattern;
|
||||||
|
|
||||||
|
/// Combines two patterns into a pattern which matches a string if one of patterns matches that string.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use sanitize_html::rules::pattern::Pattern;
|
||||||
|
/// use regex::Regex;
|
||||||
|
///
|
||||||
|
/// let pan = Pattern::regex(Regex::new("pan").unwrap());
|
||||||
|
/// let pot = Pattern::regex(Regex::new("pot").unwrap());
|
||||||
|
/// let pattern = pan | pot;
|
||||||
|
///
|
||||||
|
/// assert!(pattern.matches("pants"));
|
||||||
|
/// assert!(pattern.matches("pot"));
|
||||||
|
/// assert!(!pattern.matches("jar"));
|
||||||
|
/// ```
|
||||||
|
fn bitor(self, rhs: Pattern) -> Self::Output {
|
||||||
|
let cb1 = self.0;
|
||||||
|
let cb2 = rhs.0;
|
||||||
|
Pattern(Box::new(move |value| cb1(value) || cb2(value)))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,374 @@
|
||||||
|
//! Predefined rules
|
||||||
|
//!
|
||||||
|
//! These rules are inspired by a great Ruby gem [sanitize](https://github.com/rgrove/sanitize/).
|
||||||
|
|
||||||
|
use super::pattern::Pattern;
|
||||||
|
use super::{Element, Rules};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
fn re(regex: &str) -> Pattern {
|
||||||
|
Pattern::regex(Regex::new(regex).unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn href() -> Pattern {
|
||||||
|
re("^(ftp:|http:|https:|mailto:)") | !re("^[^/]+[[:space:]]*:")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn src() -> Pattern {
|
||||||
|
re("^(http:|https:)") | !re("^[^/]+[[:space:]]*:")
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
/// Basic rules. Allows a variety of markup including formatting elements, links, and lists.
|
||||||
|
pub static ref BASIC: Rules = basic();
|
||||||
|
|
||||||
|
/// Default rules. Removes all tags.
|
||||||
|
pub static ref DEFAULT: Rules = default();
|
||||||
|
|
||||||
|
/// Relaxed rules. Allows an even wider variety of markup, including images and tables
|
||||||
|
pub static ref RELAXED: Rules = relaxed();
|
||||||
|
|
||||||
|
/// Restricted rules. Allows only very simple inline markup. No links, images, or block elements.
|
||||||
|
pub static ref RESTRICTED: Rules = restricted();
|
||||||
|
|
||||||
|
/// Rules for document from untrusted sources. Removes all tags but text emphasizing and links.
|
||||||
|
pub static ref UNTRUSTED: Rules = untrusted();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn basic() -> Rules {
|
||||||
|
Rules::new()
|
||||||
|
.element(Element::new("a").attribute("href", href()))
|
||||||
|
.element(Element::new("abbr").attribute("title", Pattern::any()))
|
||||||
|
.element(Element::new("b"))
|
||||||
|
.element(Element::new("blockquote").attribute("cite", src()))
|
||||||
|
.element(Element::new("br"))
|
||||||
|
.element(Element::new("br"))
|
||||||
|
.element(Element::new("cite"))
|
||||||
|
.element(Element::new("code"))
|
||||||
|
.element(Element::new("dd"))
|
||||||
|
.element(Element::new("dfn").attribute("title", Pattern::any()))
|
||||||
|
.element(Element::new("dl"))
|
||||||
|
.element(Element::new("dt"))
|
||||||
|
.element(Element::new("em"))
|
||||||
|
.element(Element::new("i"))
|
||||||
|
.element(Element::new("kbd"))
|
||||||
|
.element(Element::new("li"))
|
||||||
|
.element(Element::new("mark"))
|
||||||
|
.element(Element::new("ol"))
|
||||||
|
.element(Element::new("p"))
|
||||||
|
.element(Element::new("pre"))
|
||||||
|
.element(Element::new("q").attribute("cite", src()))
|
||||||
|
.element(Element::new("s"))
|
||||||
|
.element(Element::new("samp"))
|
||||||
|
.element(Element::new("small"))
|
||||||
|
.element(Element::new("strike"))
|
||||||
|
.element(Element::new("strong"))
|
||||||
|
.element(Element::new("sub"))
|
||||||
|
.element(Element::new("sup"))
|
||||||
|
.element(
|
||||||
|
Element::new("time")
|
||||||
|
.attribute("datetime", Pattern::any())
|
||||||
|
.attribute("pubdate", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(Element::new("u"))
|
||||||
|
.element(Element::new("ul"))
|
||||||
|
.element(Element::new("var"))
|
||||||
|
.space("address")
|
||||||
|
.space("article")
|
||||||
|
.space("aside")
|
||||||
|
.space("div")
|
||||||
|
.space("footer")
|
||||||
|
.space("h1")
|
||||||
|
.space("h2")
|
||||||
|
.space("h3")
|
||||||
|
.space("h4")
|
||||||
|
.space("h5")
|
||||||
|
.space("h6")
|
||||||
|
.space("header")
|
||||||
|
.space("hgroup")
|
||||||
|
.space("hr")
|
||||||
|
.space("nav")
|
||||||
|
.space("section")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default() -> Rules {
|
||||||
|
Rules::new()
|
||||||
|
.space("address")
|
||||||
|
.space("article")
|
||||||
|
.space("aside")
|
||||||
|
.space("blockquote")
|
||||||
|
.space("br")
|
||||||
|
.space("dd")
|
||||||
|
.space("div")
|
||||||
|
.space("dl")
|
||||||
|
.space("dt")
|
||||||
|
.space("footer")
|
||||||
|
.space("h1")
|
||||||
|
.space("h2")
|
||||||
|
.space("h3")
|
||||||
|
.space("h4")
|
||||||
|
.space("h5")
|
||||||
|
.space("h6")
|
||||||
|
.space("header")
|
||||||
|
.space("hgroup")
|
||||||
|
.space("hr")
|
||||||
|
.space("li")
|
||||||
|
.space("nav")
|
||||||
|
.space("ol")
|
||||||
|
.space("p")
|
||||||
|
.space("pre")
|
||||||
|
.space("section")
|
||||||
|
.space("ul")
|
||||||
|
.delete("iframe")
|
||||||
|
.delete("noembed")
|
||||||
|
.delete("noframes")
|
||||||
|
.delete("noscript")
|
||||||
|
.delete("script")
|
||||||
|
.delete("style")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn relaxed() -> Rules {
|
||||||
|
fn relaxed_element(name: &str) -> Element {
|
||||||
|
Element::new(name)
|
||||||
|
.attribute("dir", Pattern::any())
|
||||||
|
.attribute("lang", Pattern::any())
|
||||||
|
.attribute("title", Pattern::any())
|
||||||
|
.attribute("class", Pattern::any())
|
||||||
|
}
|
||||||
|
|
||||||
|
Rules::new()
|
||||||
|
.element(relaxed_element("a").attribute("href", href()))
|
||||||
|
.element(relaxed_element("abbr"))
|
||||||
|
.element(relaxed_element("b"))
|
||||||
|
.element(relaxed_element("bdo"))
|
||||||
|
.element(relaxed_element("blockquote").attribute("cite", src()))
|
||||||
|
.element(relaxed_element("br"))
|
||||||
|
.element(relaxed_element("caption"))
|
||||||
|
.element(relaxed_element("cite"))
|
||||||
|
.element(relaxed_element("code"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("col")
|
||||||
|
.attribute("span", Pattern::any())
|
||||||
|
.attribute("width", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(
|
||||||
|
relaxed_element("colgroup")
|
||||||
|
.attribute("span", Pattern::any())
|
||||||
|
.attribute("width", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("dd"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("del")
|
||||||
|
.attribute("cite", src())
|
||||||
|
.attribute("datetime", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("dfn"))
|
||||||
|
.element(relaxed_element("dl"))
|
||||||
|
.element(relaxed_element("dt"))
|
||||||
|
.element(relaxed_element("em"))
|
||||||
|
.element(relaxed_element("figcaption"))
|
||||||
|
.element(relaxed_element("figure"))
|
||||||
|
.element(relaxed_element("h1"))
|
||||||
|
.element(relaxed_element("h2"))
|
||||||
|
.element(relaxed_element("h3"))
|
||||||
|
.element(relaxed_element("h4"))
|
||||||
|
.element(relaxed_element("h5"))
|
||||||
|
.element(relaxed_element("h6"))
|
||||||
|
.element(relaxed_element("hgroup"))
|
||||||
|
.element(relaxed_element("i"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("img")
|
||||||
|
.attribute("src", src())
|
||||||
|
.attribute("align", Pattern::any())
|
||||||
|
.attribute("alt", Pattern::any())
|
||||||
|
.attribute("width", Pattern::any())
|
||||||
|
.attribute("height", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(
|
||||||
|
relaxed_element("ins")
|
||||||
|
.attribute("cite", src())
|
||||||
|
.attribute("datetime", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("kbd"))
|
||||||
|
.element(relaxed_element("li"))
|
||||||
|
.element(relaxed_element("mark"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("ol")
|
||||||
|
.attribute("start", Pattern::any())
|
||||||
|
.attribute("reversed", Pattern::any())
|
||||||
|
.attribute("type", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("p"))
|
||||||
|
.element(relaxed_element("pre"))
|
||||||
|
.element(relaxed_element("q").attribute("cite", src()))
|
||||||
|
.element(relaxed_element("rp"))
|
||||||
|
.element(relaxed_element("rt"))
|
||||||
|
.element(relaxed_element("ruby"))
|
||||||
|
.element(relaxed_element("s"))
|
||||||
|
.element(relaxed_element("samp"))
|
||||||
|
.element(relaxed_element("small"))
|
||||||
|
.element(relaxed_element("strike"))
|
||||||
|
.element(relaxed_element("strong"))
|
||||||
|
.element(relaxed_element("sub"))
|
||||||
|
.element(relaxed_element("sup"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("table")
|
||||||
|
.attribute("summary", Pattern::any())
|
||||||
|
.attribute("width", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("tbody"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("td")
|
||||||
|
.attribute("abbr", Pattern::any())
|
||||||
|
.attribute("axis", Pattern::any())
|
||||||
|
.attribute("colspan", Pattern::any())
|
||||||
|
.attribute("rowspan", Pattern::any())
|
||||||
|
.attribute("width", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("tfoot"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("th")
|
||||||
|
.attribute("abbr", Pattern::any())
|
||||||
|
.attribute("axis", Pattern::any())
|
||||||
|
.attribute("colspan", Pattern::any())
|
||||||
|
.attribute("rowspan", Pattern::any())
|
||||||
|
.attribute("scope", Pattern::any())
|
||||||
|
.attribute("width", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("thead"))
|
||||||
|
.element(
|
||||||
|
relaxed_element("time")
|
||||||
|
.attribute("datetime", Pattern::any())
|
||||||
|
.attribute("pubdate", Pattern::any()),
|
||||||
|
)
|
||||||
|
.element(relaxed_element("tr"))
|
||||||
|
.element(relaxed_element("u"))
|
||||||
|
.element(relaxed_element("ul").attribute("type", Pattern::any()))
|
||||||
|
.element(relaxed_element("var"))
|
||||||
|
.element(relaxed_element("wbr"))
|
||||||
|
.space("address")
|
||||||
|
.space("article")
|
||||||
|
.space("aside")
|
||||||
|
.space("footer")
|
||||||
|
.space("header")
|
||||||
|
.space("hr")
|
||||||
|
.space("nav")
|
||||||
|
.space("section")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn restricted() -> Rules {
|
||||||
|
Rules::new()
|
||||||
|
.element(Element::new("b"))
|
||||||
|
.element(Element::new("em"))
|
||||||
|
.element(Element::new("i"))
|
||||||
|
.element(Element::new("strong"))
|
||||||
|
.element(Element::new("u"))
|
||||||
|
.space("address")
|
||||||
|
.space("article")
|
||||||
|
.space("aside")
|
||||||
|
.space("blockquote")
|
||||||
|
.space("br")
|
||||||
|
.space("dd")
|
||||||
|
.space("div")
|
||||||
|
.space("dl")
|
||||||
|
.space("dt")
|
||||||
|
.space("footer")
|
||||||
|
.space("h1")
|
||||||
|
.space("h2")
|
||||||
|
.space("h3")
|
||||||
|
.space("h4")
|
||||||
|
.space("h5")
|
||||||
|
.space("h6")
|
||||||
|
.space("header")
|
||||||
|
.space("hgroup")
|
||||||
|
.space("hr")
|
||||||
|
.space("li")
|
||||||
|
.space("nav")
|
||||||
|
.space("ol")
|
||||||
|
.space("p")
|
||||||
|
.space("pre")
|
||||||
|
.space("section")
|
||||||
|
.space("ul")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn untrusted() -> Rules {
|
||||||
|
Rules::new()
|
||||||
|
.element(
|
||||||
|
Element::new("a")
|
||||||
|
.attribute("href", href())
|
||||||
|
.mandatory_attribute("target", "_blank")
|
||||||
|
.mandatory_attribute("rel", "noreferrer noopener"),
|
||||||
|
)
|
||||||
|
.element(Element::new("b"))
|
||||||
|
.element(Element::new("em"))
|
||||||
|
.element(Element::new("i"))
|
||||||
|
.element(Element::new("strong"))
|
||||||
|
.element(Element::new("u"))
|
||||||
|
.space("address")
|
||||||
|
.space("article")
|
||||||
|
.space("aside")
|
||||||
|
.space("blockquote")
|
||||||
|
.space("br")
|
||||||
|
.space("dd")
|
||||||
|
.space("div")
|
||||||
|
.space("dl")
|
||||||
|
.space("dt")
|
||||||
|
.space("footer")
|
||||||
|
.space("h1")
|
||||||
|
.space("h2")
|
||||||
|
.space("h3")
|
||||||
|
.space("h4")
|
||||||
|
.space("h5")
|
||||||
|
.space("h6")
|
||||||
|
.space("header")
|
||||||
|
.space("hgroup")
|
||||||
|
.space("hr")
|
||||||
|
.space("li")
|
||||||
|
.space("nav")
|
||||||
|
.space("ol")
|
||||||
|
.space("p")
|
||||||
|
.space("pre")
|
||||||
|
.space("section")
|
||||||
|
.space("ul")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::{basic, default, relaxed, restricted, untrusted};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_does_not_fail() {
|
||||||
|
let rules = basic();
|
||||||
|
assert_eq!(rules.allowed_elements.len(), 31);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn default_does_not_fail() {
|
||||||
|
let rules = default();
|
||||||
|
assert_eq!(rules.allowed_elements.len(), 0);
|
||||||
|
assert_eq!(rules.space_elements.len(), 26);
|
||||||
|
assert_eq!(rules.delete_elements.len(), 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn relaxed_does_not_fail() {
|
||||||
|
let rules = relaxed();
|
||||||
|
assert_eq!(rules.allowed_elements.len(), 58);
|
||||||
|
assert_eq!(rules.space_elements.len(), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn restricted_does_not_fail() {
|
||||||
|
let rules = restricted();
|
||||||
|
assert_eq!(rules.allowed_elements.len(), 5);
|
||||||
|
assert_eq!(rules.space_elements.len(), 26);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn untrusted_does_not_fail() {
|
||||||
|
let rules = untrusted();
|
||||||
|
assert_eq!(rules.allowed_elements.len(), 6);
|
||||||
|
assert_eq!(rules.space_elements.len(), 26);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,167 @@
|
||||||
|
use crate::rules::{Element, Rules};
|
||||||
|
use html5ever::{interface::QualName, namespace_url, ns, LocalName};
|
||||||
|
use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef};
|
||||||
|
|
||||||
|
fn simple_qual_name(name: &str) -> QualName {
|
||||||
|
QualName::new(None, ns!(), LocalName::from(name))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn qual_name_to_string(name: &QualName) -> String {
|
||||||
|
if name.ns == ns!(html) || name.ns.is_empty() {
|
||||||
|
name.local.to_lowercase()
|
||||||
|
} else {
|
||||||
|
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expanded_name_to_string(name: &ExpandedName) -> String {
|
||||||
|
if name.ns == ns!(html) || name.ns.is_empty() {
|
||||||
|
name.local.to_lowercase()
|
||||||
|
} else {
|
||||||
|
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn simple_element(
|
||||||
|
name: QualName,
|
||||||
|
attrs: Vec<(ExpandedName, Attribute)>,
|
||||||
|
children: Vec<NodeRef>,
|
||||||
|
) -> NodeRef {
|
||||||
|
let element = NodeRef::new_element(name, attrs);
|
||||||
|
for child in children {
|
||||||
|
child.detach();
|
||||||
|
element.append(child);
|
||||||
|
}
|
||||||
|
element
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_space_text() -> NodeRef {
|
||||||
|
NodeRef::new_text(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ElementAction<'t> {
|
||||||
|
Keep(&'t Element),
|
||||||
|
Delete,
|
||||||
|
Space,
|
||||||
|
Elide,
|
||||||
|
Rename(&'t str),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> {
|
||||||
|
let name = qual_name_to_string(element_name);
|
||||||
|
if name == "html" || name == "body" {
|
||||||
|
ElementAction::Elide
|
||||||
|
} else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) {
|
||||||
|
ElementAction::Keep(element_sanitizer)
|
||||||
|
} else if rules.delete_elements.contains(&name) {
|
||||||
|
ElementAction::Delete
|
||||||
|
} else if rules.space_elements.contains(&name) {
|
||||||
|
ElementAction::Space
|
||||||
|
} else if let Some(rename_to) = rules.rename_elements.get(&name) {
|
||||||
|
ElementAction::Rename(rename_to)
|
||||||
|
} else {
|
||||||
|
ElementAction::Elide
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clean_nodes(nodes: impl IntoIterator<Item = NodeRef>, rules: &Rules) -> Vec<NodeRef> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for node in nodes {
|
||||||
|
let subnodes = clean_node(&node, rules);
|
||||||
|
result.extend(subnodes);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clean_node(node: &NodeRef, rules: &Rules) -> Vec<NodeRef> {
|
||||||
|
match node.data() {
|
||||||
|
NodeData::Document(..) => vec![],
|
||||||
|
NodeData::DocumentFragment => vec![], // TODO: ??
|
||||||
|
NodeData::Doctype(..) => vec![],
|
||||||
|
NodeData::ProcessingInstruction(..) => vec![],
|
||||||
|
|
||||||
|
NodeData::Text(..) => vec![node.clone()],
|
||||||
|
|
||||||
|
NodeData::Comment(..) => {
|
||||||
|
if rules.allow_comments {
|
||||||
|
vec![node.clone()]
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeData::Element(ElementData {
|
||||||
|
ref name,
|
||||||
|
ref attributes,
|
||||||
|
..
|
||||||
|
}) => {
|
||||||
|
match element_action(name, rules) {
|
||||||
|
ElementAction::Keep(element_sanitizer) => {
|
||||||
|
let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new();
|
||||||
|
|
||||||
|
/* whitelisted attributes */
|
||||||
|
for (attr_name, attr_value) in attributes.borrow().map.iter() {
|
||||||
|
if element_sanitizer
|
||||||
|
.is_valid(&expanded_name_to_string(attr_name), &attr_value.value)
|
||||||
|
{
|
||||||
|
new_attrs.push((attr_name.clone(), attr_value.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* mandatory attributes */
|
||||||
|
let mut mandatory_attributes: Vec<(&String, &String)> =
|
||||||
|
element_sanitizer.mandatory_attributes.iter().collect();
|
||||||
|
mandatory_attributes.sort();
|
||||||
|
for &(attr_name, attr_value) in mandatory_attributes.iter() {
|
||||||
|
new_attrs.push((
|
||||||
|
ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())),
|
||||||
|
Attribute {
|
||||||
|
prefix: None,
|
||||||
|
value: attr_value.into(),
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let children = clean_nodes(node.children(), rules);
|
||||||
|
let element = simple_element(name.clone(), new_attrs, children);
|
||||||
|
|
||||||
|
vec![element]
|
||||||
|
}
|
||||||
|
|
||||||
|
ElementAction::Delete => vec![],
|
||||||
|
|
||||||
|
ElementAction::Elide => clean_nodes(node.children(), rules),
|
||||||
|
|
||||||
|
ElementAction::Space => {
|
||||||
|
let mut nodes = clean_nodes(node.children(), rules);
|
||||||
|
if nodes.is_empty() {
|
||||||
|
nodes.push(create_space_text());
|
||||||
|
} else {
|
||||||
|
nodes.insert(0, create_space_text());
|
||||||
|
nodes.push(create_space_text());
|
||||||
|
}
|
||||||
|
nodes
|
||||||
|
}
|
||||||
|
|
||||||
|
ElementAction::Rename(rename_to) => {
|
||||||
|
let children = clean_nodes(node.children(), rules);
|
||||||
|
vec![simple_element(
|
||||||
|
simple_qual_name(rename_to),
|
||||||
|
Vec::new(),
|
||||||
|
children,
|
||||||
|
)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef {
|
||||||
|
let new_children = clean_nodes(dom.children(), mode);
|
||||||
|
let new_dom = NodeRef::new_document();
|
||||||
|
for child in new_children {
|
||||||
|
child.detach();
|
||||||
|
new_dom.append(child);
|
||||||
|
}
|
||||||
|
new_dom
|
||||||
|
}
|
|
@ -0,0 +1,645 @@
|
||||||
|
#![cfg(test)]
|
||||||
|
|
||||||
|
use super::rules::predefined::*;
|
||||||
|
use super::rules::{Element, Rules};
|
||||||
|
use super::sanitize_str;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty() {
|
||||||
|
assert_eq!(&sanitize_str(&BASIC, "").unwrap(), "");
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, "").unwrap(), "");
|
||||||
|
assert_eq!(&sanitize_str(&RELAXED, "").unwrap(), "");
|
||||||
|
assert_eq!(&sanitize_str(&RESTRICTED, "").unwrap(), "");
|
||||||
|
assert_eq!(&sanitize_str(&UNTRUSTED, "").unwrap(), "");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* basic */
|
||||||
|
|
||||||
|
const BASIC_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\");</script>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_default() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, BASIC_HTML).unwrap(),
|
||||||
|
"Lorem ipsum dolor sit amet "
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, BASIC_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, BASIC_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> <a href=\"pants\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, BASIC_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> <a href=\"pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* malformed */
|
||||||
|
|
||||||
|
const MALFORMED_HTML: &str = "Lo<!-- comment -->rem</b> <a href=pants title=\"foo>ipsum <a href=\"http://foo.com/\"><strong>dolor</a></strong> sit<br/>amet <script>alert(\"hello world\");";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malformed_default() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, MALFORMED_HTML).unwrap(),
|
||||||
|
"Lorem dolor sit amet "
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malformed_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, MALFORMED_HTML).unwrap(),
|
||||||
|
"Lorem <strong>dolor</strong> sit amet alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malformed_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, MALFORMED_HTML).unwrap(),
|
||||||
|
"Lorem <a href=\"pants\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malformed_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, MALFORMED_HTML).unwrap(),
|
||||||
|
"Lorem <a href=\"pants\" title=\"foo>ipsum <a href=\"><strong>dolor</strong></a> sit<br>amet alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* unclosed */
|
||||||
|
|
||||||
|
const UNCLOSED_HTML: &str = "<p>a</p><blockquote>b";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unclosed_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, UNCLOSED_HTML).unwrap(), " a b ");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unclosed_restricted() {
|
||||||
|
assert_eq!(&sanitize_str(&RESTRICTED, UNCLOSED_HTML).unwrap(), " a b ");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unclosed_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, UNCLOSED_HTML).unwrap(),
|
||||||
|
"<p>a</p><blockquote>b</blockquote>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unclosed_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, UNCLOSED_HTML).unwrap(),
|
||||||
|
"<p>a</p><blockquote>b</blockquote>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* malicious */
|
||||||
|
|
||||||
|
const MALICIOUS_HTML: &str = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert(\"hello world\");</script>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malicious_default() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, MALICIOUS_HTML).unwrap(),
|
||||||
|
"Lorem ipsum dolor sit amet <script>alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malicious_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, MALICIOUS_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malicious_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, MALICIOUS_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> <a>ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet <script>alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malicious_untrusted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&UNTRUSTED, MALICIOUS_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> <a rel=\"noreferrer noopener\" target=\"_blank\">ipsum</a> <a href=\"http://foo.com/\" rel=\"noreferrer noopener\" target=\"_blank\"><strong>dolor</strong></a> sit amet <script>alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malicious_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, MALICIOUS_HTML).unwrap(),
|
||||||
|
"<b>Lorem</b> <a title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br>amet <script>alert(\"hello world\");"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* raw-comment */
|
||||||
|
|
||||||
|
const RAW_COMMENT_HTML: &str = "<!-- comment -->Hello";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn raw_comment_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn raw_comment_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, RAW_COMMENT_HTML).unwrap(),
|
||||||
|
"Hello"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn raw_comment_basic() {
|
||||||
|
assert_eq!(&sanitize_str(&BASIC, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn raw_comment_relaxed() {
|
||||||
|
assert_eq!(&sanitize_str(&RELAXED, RAW_COMMENT_HTML).unwrap(), "Hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: simple, no spaces */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_1: &str = "<a href=\"javascript:alert(\'XSS\');\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_1_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_1).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_1_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_1).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_1_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_1).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_1_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_1).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: simple, spaces before */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_2: &str = "<a href=\"javascript :alert(\'XSS\');\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_2_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_2).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_2_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_2).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_2_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_2).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_2_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_2).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: simple, spaces after */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_3: &str = "<a href=\"javascript: alert(\'XSS\');\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_3_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_3).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_3_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_3).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_3_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_3).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_3_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_3).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: simple, spaces before and after */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_4: &str = "<a href=\"javascript : alert(\'XSS\');\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_4_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_4).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_4_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_4).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_4_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_4).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_4_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_4).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: preceding colon */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_5: &str = "<a href=\":javascript:alert(\'XSS\');\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_5_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_5).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_5_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_5).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_5_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_5).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_5_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_5).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: UTF-8 encoding */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_6: &str = "<a href=\"javascript:\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_6_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_6).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_6_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_6).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_6_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_6).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_6_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_6).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: long UTF-8 encoding */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_7: &str = "<a href=\"javascript:\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_7_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_7).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_7_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_7).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_7_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_7).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_7_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_7).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: long UTF-8 encoding without semicolons */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_8: &str = "<a href=javascript:alert('XSS')>foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_8_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_8).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_8_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_8).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_8_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_8).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_8_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_8).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: hex encoding */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_9: &str = "<a href=\"javascript:\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_9_default() {
|
||||||
|
assert_eq!(&sanitize_str(&DEFAULT, JS_INJECTION_HTML_9).unwrap(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_9_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_9).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_9_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_9).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_9_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_9).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: long hex encoding */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_10: &str = "<a href=\"javascript:\">foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_10_default() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_10).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_10_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_10).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_10_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_10).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_10_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_10).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* protocol-based JS injection: hex encoding without semicolons */
|
||||||
|
|
||||||
|
const JS_INJECTION_HTML_11: &str = "<a href=javascript:alert('XSS')>foo</a>";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_11_default() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, JS_INJECTION_HTML_11).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_11_restricted() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RESTRICTED, JS_INJECTION_HTML_11).unwrap(),
|
||||||
|
"foo"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_11_basic() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&BASIC, JS_INJECTION_HTML_11).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn js_injection_11_relaxed() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&RELAXED, JS_INJECTION_HTML_11).unwrap(),
|
||||||
|
"<a>foo</a>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should translate valid HTML entities */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_1() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, "Don't tasé me & bro!").unwrap(),
|
||||||
|
"Don't tasé me & bro!"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should translate valid HTML entities while encoding unencoded ampersands */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_2() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, "cookies² & ¼ créme").unwrap(),
|
||||||
|
"cookies² & ¼ créme"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should never output ' */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_3() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(
|
||||||
|
&DEFAULT,
|
||||||
|
"<a href=''' class=\"' '\">IE6 isn't a real browser</a>"
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
"IE6 isn't a real browser"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should not choke on several instances of the same element in a row */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_4() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, "<img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\"><img src=\"http://www.google.com/intl/en_ALL/images/logo.gif\">").unwrap(),
|
||||||
|
""
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* should surround the contents of :whitespace_elements with space characters when removing the element */
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_5() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, "foo<div>bar</div>baz").unwrap(),
|
||||||
|
"foo bar baz"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_6() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, "foo<br>bar<br>baz").unwrap(),
|
||||||
|
"foo bar baz"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn misc_7() {
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&DEFAULT, "foo<hr>bar<hr>baz").unwrap(),
|
||||||
|
"foo bar baz"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn custom_rules() {
|
||||||
|
let rules = Rules::new()
|
||||||
|
.allow_comments(true)
|
||||||
|
.element(Element::new("b"))
|
||||||
|
.element(Element::new("span"))
|
||||||
|
.delete("script")
|
||||||
|
.delete("style")
|
||||||
|
.space("br")
|
||||||
|
.rename("strong", "span");
|
||||||
|
|
||||||
|
let html = "<b>Lo<!-- comment -->rem</b> <a href=\"javascript:pants\" title=\"foo\">ipsum</a> <a href=\"http://foo.com/\"><strong>dolor</strong></a> sit<br/>amet <script>alert(\"hello world\")</script>";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&sanitize_str(&rules, html).unwrap(),
|
||||||
|
"<b>Lo<!-- comment -->rem</b> ipsum <span>dolor</span> sit amet "
|
||||||
|
);
|
||||||
|
}
|
Loading…
Reference in New Issue