203 lines
6.9 KiB
Rust
203 lines
6.9 KiB
Rust
use crate::rules::{Element, Rules};
|
|
use html5ever::{interface::QualName, namespace_url, ns, LocalName};
|
|
use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef};
|
|
|
|
fn simple_qual_name(name: &str) -> QualName {
|
|
QualName::new(None, ns!(), LocalName::from(name))
|
|
}
|
|
|
|
fn qual_name_to_string(name: &QualName) -> String {
|
|
if name.ns == ns!(html) || name.ns.is_empty() {
|
|
name.local.to_lowercase()
|
|
} else {
|
|
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
|
}
|
|
}
|
|
|
|
fn expanded_name_to_string(name: &ExpandedName) -> String {
|
|
if name.ns == ns!(html) || name.ns.is_empty() {
|
|
name.local.to_lowercase()
|
|
} else {
|
|
format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase())
|
|
}
|
|
}
|
|
|
|
fn simple_element(
|
|
name: QualName,
|
|
attrs: Vec<(ExpandedName, Attribute)>,
|
|
children: Vec<NodeRef>,
|
|
) -> NodeRef {
|
|
let element = NodeRef::new_element(name, attrs);
|
|
for child in children {
|
|
child.detach();
|
|
element.append(child);
|
|
}
|
|
element
|
|
}
|
|
|
|
fn create_space_text() -> NodeRef {
|
|
NodeRef::new_text(" ")
|
|
}
|
|
|
|
enum ElementAction<'t> {
|
|
Keep(&'t Element),
|
|
Delete,
|
|
Space,
|
|
Elide,
|
|
Rename(&'t str),
|
|
}
|
|
|
|
fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> {
|
|
let name = qual_name_to_string(element_name);
|
|
if name == "html" || name == "body" {
|
|
ElementAction::Elide
|
|
} else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) {
|
|
ElementAction::Keep(element_sanitizer)
|
|
} else if rules.delete_elements.contains(&name) {
|
|
ElementAction::Delete
|
|
} else if rules.space_elements.contains(&name) {
|
|
ElementAction::Space
|
|
} else if let Some(rename_to) = rules.rename_elements.get(&name) {
|
|
ElementAction::Rename(rename_to)
|
|
} else {
|
|
ElementAction::Elide
|
|
}
|
|
}
|
|
|
|
fn clean_nodes(nodes: impl IntoIterator<Item = NodeRef>, rules: &Rules) -> Vec<NodeRef> {
|
|
let mut result = Vec::new();
|
|
for node in nodes {
|
|
let subnodes = clean_node(&node, rules);
|
|
result.extend(subnodes);
|
|
}
|
|
result
|
|
}
|
|
|
|
fn clean_node(node: &NodeRef, rules: &Rules) -> Vec<NodeRef> {
|
|
match node.data() {
|
|
NodeData::Document(..) => vec![],
|
|
NodeData::DocumentFragment => vec![], // TODO: ??
|
|
NodeData::Doctype(..) => vec![],
|
|
NodeData::ProcessingInstruction(..) => vec![],
|
|
|
|
NodeData::Text(..) => vec![node.clone()],
|
|
|
|
NodeData::Comment(..) => {
|
|
if rules.allow_comments {
|
|
vec![node.clone()]
|
|
} else {
|
|
vec![]
|
|
}
|
|
}
|
|
|
|
NodeData::Element(ElementData {
|
|
ref name,
|
|
ref attributes,
|
|
..
|
|
}) => {
|
|
match element_action(name, rules) {
|
|
ElementAction::Keep(element_sanitizer) => {
|
|
let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new();
|
|
|
|
/* whitelisted attributes */
|
|
for (attr_name, attr_value) in attributes.borrow().map.iter() {
|
|
let expanded_name = expanded_name_to_string(attr_name);
|
|
|
|
let new_value = if !element_sanitizer.attribute_rules.modify_attributes.contains_key(&expanded_name) {
|
|
attr_value.clone()
|
|
} else {
|
|
let func = element_sanitizer.attribute_rules.modify_attributes.get(&expanded_name).unwrap();
|
|
let new_value = func(attr_value.value.clone());
|
|
Attribute {
|
|
prefix: attr_value.prefix.clone(),
|
|
value: new_value
|
|
}
|
|
};
|
|
|
|
if !element_sanitizer
|
|
.is_valid(&expanded_name_to_string(attr_name), &new_value.value)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
let name = &attr_name.local.to_string();
|
|
let new_name = if element_sanitizer
|
|
.attribute_rules
|
|
.rename_attributes
|
|
.contains_key(name)
|
|
{
|
|
ExpandedName::new(
|
|
attr_name.ns.clone(),
|
|
String::from(
|
|
element_sanitizer
|
|
.attribute_rules
|
|
.rename_attributes
|
|
.get(name)
|
|
.unwrap(),
|
|
),
|
|
)
|
|
} else {
|
|
attr_name.clone()
|
|
};
|
|
|
|
new_attrs.push((new_name, attr_value.clone()));
|
|
}
|
|
|
|
/* mandatory attributes */
|
|
let mut mandatory_attributes: Vec<(&String, &String)> =
|
|
element_sanitizer.mandatory_attributes.iter().collect();
|
|
mandatory_attributes.sort();
|
|
for &(attr_name, attr_value) in mandatory_attributes.iter() {
|
|
new_attrs.push((
|
|
ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())),
|
|
Attribute {
|
|
prefix: None,
|
|
value: attr_value.into(),
|
|
},
|
|
));
|
|
}
|
|
|
|
let children = clean_nodes(node.children(), rules);
|
|
let element = simple_element(name.clone(), new_attrs, children);
|
|
|
|
vec![element]
|
|
}
|
|
|
|
ElementAction::Delete => vec![],
|
|
|
|
ElementAction::Elide => clean_nodes(node.children(), rules),
|
|
|
|
ElementAction::Space => {
|
|
let mut nodes = clean_nodes(node.children(), rules);
|
|
if nodes.is_empty() {
|
|
nodes.push(create_space_text());
|
|
} else {
|
|
nodes.insert(0, create_space_text());
|
|
nodes.push(create_space_text());
|
|
}
|
|
nodes
|
|
}
|
|
|
|
ElementAction::Rename(rename_to) => {
|
|
let children = clean_nodes(node.children(), rules);
|
|
vec![simple_element(
|
|
simple_qual_name(rename_to),
|
|
Vec::new(),
|
|
children,
|
|
)]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef {
|
|
let new_children = clean_nodes(dom.children(), mode);
|
|
let new_dom = NodeRef::new_document();
|
|
for child in new_children {
|
|
child.detach();
|
|
new_dom.append(child);
|
|
}
|
|
new_dom
|
|
}
|