use crate::rules::{Element, Rules}; use html5ever::{interface::QualName, namespace_url, ns, LocalName}; use kuchiki::{Attribute, ElementData, ExpandedName, NodeData, NodeRef}; fn simple_qual_name(name: &str) -> QualName { QualName::new(None, ns!(), LocalName::from(name)) } fn qual_name_to_string(name: &QualName) -> String { if name.ns == ns!(html) || name.ns.is_empty() { name.local.to_lowercase() } else { format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase()) } } fn expanded_name_to_string(name: &ExpandedName) -> String { if name.ns == ns!(html) || name.ns.is_empty() { name.local.to_lowercase() } else { format!("{}:{}", name.ns.to_lowercase(), name.local.to_lowercase()) } } fn simple_element( name: QualName, attrs: Vec<(ExpandedName, Attribute)>, children: Vec, ) -> NodeRef { let element = NodeRef::new_element(name, attrs); for child in children { child.detach(); element.append(child); } element } fn create_space_text() -> NodeRef { NodeRef::new_text(" ") } enum ElementAction<'t> { Keep(&'t Element), Delete, Space, Elide, Rename(&'t str), } fn element_action<'t>(element_name: &QualName, rules: &'t Rules) -> ElementAction<'t> { let name = qual_name_to_string(element_name); if name == "html" || name == "body" { ElementAction::Elide } else if let Some(element_sanitizer) = rules.allowed_elements.get(&name) { ElementAction::Keep(element_sanitizer) } else if rules.delete_elements.contains(&name) { ElementAction::Delete } else if rules.space_elements.contains(&name) { ElementAction::Space } else if let Some(rename_to) = rules.rename_elements.get(&name) { ElementAction::Rename(rename_to) } else { ElementAction::Elide } } fn clean_nodes(nodes: impl IntoIterator, rules: &Rules) -> Vec { let mut result = Vec::new(); for node in nodes { let subnodes = clean_node(&node, rules); result.extend(subnodes); } result } fn clean_node(node: &NodeRef, rules: &Rules) -> Vec { match node.data() { NodeData::Document(..) => vec![], NodeData::DocumentFragment => vec![], // TODO: ?? NodeData::Doctype(..) => vec![], NodeData::ProcessingInstruction(..) => vec![], NodeData::Text(..) => vec![node.clone()], NodeData::Comment(..) => { if rules.allow_comments { vec![node.clone()] } else { vec![] } } NodeData::Element(ElementData { ref name, ref attributes, .. }) => { match element_action(name, rules) { ElementAction::Keep(element_sanitizer) => { let mut new_attrs: Vec<(ExpandedName, Attribute)> = Vec::new(); /* whitelisted attributes */ for (attr_name, attr_value) in attributes.borrow().map.iter() { let expanded_name = expanded_name_to_string(attr_name); let new_value = if !element_sanitizer.attribute_rules.modify_attributes.contains_key(&expanded_name) { attr_value.clone() } else { let func = element_sanitizer.attribute_rules.modify_attributes.get(&expanded_name).unwrap(); let new_value = func(attr_value.value.clone()); Attribute { prefix: attr_value.prefix.clone(), value: new_value } }; if !element_sanitizer .is_valid(&expanded_name_to_string(attr_name), &new_value.value) { continue; } let name = &attr_name.local.to_string(); let new_name = if element_sanitizer .attribute_rules .rename_attributes .contains_key(name) { ExpandedName::new( attr_name.ns.clone(), String::from( element_sanitizer .attribute_rules .rename_attributes .get(name) .unwrap(), ), ) } else { attr_name.clone() }; new_attrs.push((new_name, attr_value.clone())); } /* mandatory attributes */ let mut mandatory_attributes: Vec<(&String, &String)> = element_sanitizer.mandatory_attributes.iter().collect(); mandatory_attributes.sort(); for &(attr_name, attr_value) in mandatory_attributes.iter() { new_attrs.push(( ExpandedName::new(ns!(), LocalName::from(attr_name.as_str())), Attribute { prefix: None, value: attr_value.into(), }, )); } let children = clean_nodes(node.children(), rules); let element = simple_element(name.clone(), new_attrs, children); vec![element] } ElementAction::Delete => vec![], ElementAction::Elide => clean_nodes(node.children(), rules), ElementAction::Space => { let mut nodes = clean_nodes(node.children(), rules); if nodes.is_empty() { nodes.push(create_space_text()); } else { nodes.insert(0, create_space_text()); nodes.push(create_space_text()); } nodes } ElementAction::Rename(rename_to) => { let children = clean_nodes(node.children(), rules); vec![simple_element( simple_qual_name(rename_to), Vec::new(), children, )] } } } } } pub(crate) fn sanitize_dom(dom: &NodeRef, mode: &Rules) -> NodeRef { let new_children = clean_nodes(dom.children(), mode); let new_dom = NodeRef::new_document(); for child in new_children { child.detach(); new_dom.append(child); } new_dom }