242 lines
7.0 KiB
Rust
242 lines
7.0 KiB
Rust
use html5ever::tendril::StrTendril;
|
|
use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
|
|
use html5ever::{self, Attribute, ExpandedName, QualName};
|
|
use std::borrow::Cow;
|
|
|
|
use crate::attributes;
|
|
use crate::tree::NodeRef;
|
|
|
|
/// Options for the HTML parser.
|
|
#[derive(Default)]
|
|
pub struct ParseOpts {
|
|
/// Options for the HTML tokenizer.
|
|
pub tokenizer: html5ever::tokenizer::TokenizerOpts,
|
|
|
|
/// Options for the HTML tree builder.
|
|
pub tree_builder: html5ever::tree_builder::TreeBuilderOpts,
|
|
|
|
/// A callback for HTML parse errors (which are never fatal).
|
|
pub on_parse_error: Option<Box<dyn FnMut(Cow<'static, str>)>>,
|
|
}
|
|
|
|
/// Parse an HTML document with html5ever and the default configuration.
|
|
pub fn parse_html() -> html5ever::Parser<Sink> {
|
|
parse_html_with_options(ParseOpts::default())
|
|
}
|
|
|
|
/// Parse an HTML document with html5ever with custom configuration.
|
|
pub fn parse_html_with_options(opts: ParseOpts) -> html5ever::Parser<Sink> {
|
|
let sink = Sink {
|
|
document_node: NodeRef::new_document(),
|
|
on_parse_error: opts.on_parse_error,
|
|
};
|
|
let html5opts = html5ever::ParseOpts {
|
|
tokenizer: opts.tokenizer,
|
|
tree_builder: opts.tree_builder,
|
|
};
|
|
html5ever::parse_document(sink, html5opts)
|
|
}
|
|
|
|
/// Parse an HTML fragment with html5ever and the default configuration.
|
|
pub fn parse_fragment(ctx_name: QualName, ctx_attr: Vec<Attribute>) -> html5ever::Parser<Sink> {
|
|
parse_fragment_with_options(ParseOpts::default(), ctx_name, ctx_attr)
|
|
}
|
|
|
|
/// Parse an HTML fragment with html5ever with custom configuration.
|
|
pub fn parse_fragment_with_options(opts: ParseOpts, ctx_name: QualName, ctx_attr: Vec<Attribute>) -> html5ever::Parser<Sink> {
|
|
let sink = Sink {
|
|
document_node: NodeRef::new_document(),
|
|
on_parse_error: opts.on_parse_error,
|
|
};
|
|
let html5opts = html5ever::ParseOpts {
|
|
tokenizer: opts.tokenizer,
|
|
tree_builder: opts.tree_builder,
|
|
};
|
|
html5ever::parse_fragment(sink, html5opts, ctx_name, ctx_attr)
|
|
}
|
|
|
|
/// Receives new tree nodes during parsing.
|
|
pub struct Sink {
|
|
document_node: NodeRef,
|
|
on_parse_error: Option<Box<dyn FnMut(Cow<'static, str>)>>,
|
|
}
|
|
|
|
impl TreeSink for Sink {
|
|
type Output = NodeRef;
|
|
|
|
fn finish(self) -> NodeRef {
|
|
self.document_node
|
|
}
|
|
|
|
type Handle = NodeRef;
|
|
|
|
#[inline]
|
|
fn parse_error(&mut self, message: Cow<'static, str>) {
|
|
if let Some(ref mut handler) = self.on_parse_error {
|
|
handler(message)
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn get_document(&mut self) -> NodeRef {
|
|
self.document_node.clone()
|
|
}
|
|
|
|
#[inline]
|
|
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
|
self.document_node
|
|
.as_document()
|
|
.unwrap()
|
|
._quirks_mode
|
|
.set(mode)
|
|
}
|
|
|
|
#[inline]
|
|
fn same_node(&self, x: &NodeRef, y: &NodeRef) -> bool {
|
|
x == y
|
|
}
|
|
|
|
#[inline]
|
|
fn elem_name<'a>(&self, target: &'a NodeRef) -> ExpandedName<'a> {
|
|
target.as_element().unwrap().name.expanded()
|
|
}
|
|
|
|
#[inline]
|
|
fn create_element(
|
|
&mut self,
|
|
name: QualName,
|
|
attrs: Vec<Attribute>,
|
|
_flags: ElementFlags,
|
|
) -> NodeRef {
|
|
NodeRef::new_element(
|
|
name,
|
|
attrs.into_iter().map(|attr| {
|
|
let Attribute {
|
|
name: QualName { prefix, ns, local },
|
|
value,
|
|
} = attr;
|
|
let value = String::from(value);
|
|
(
|
|
attributes::ExpandedName { ns, local },
|
|
attributes::Attribute { prefix, value },
|
|
)
|
|
}),
|
|
)
|
|
}
|
|
|
|
#[inline]
|
|
fn create_comment(&mut self, text: StrTendril) -> NodeRef {
|
|
NodeRef::new_comment(text)
|
|
}
|
|
|
|
#[inline]
|
|
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeRef {
|
|
NodeRef::new_processing_instruction(target, data)
|
|
}
|
|
|
|
#[inline]
|
|
fn append(&mut self, parent: &NodeRef, child: NodeOrText<NodeRef>) {
|
|
match child {
|
|
NodeOrText::AppendNode(node) => parent.append(node),
|
|
NodeOrText::AppendText(text) => {
|
|
if let Some(last_child) = parent.last_child() {
|
|
if let Some(existing) = last_child.as_text() {
|
|
existing.borrow_mut().push_str(&text);
|
|
return;
|
|
}
|
|
}
|
|
parent.append(NodeRef::new_text(text))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn append_before_sibling(&mut self, sibling: &NodeRef, child: NodeOrText<NodeRef>) {
|
|
match child {
|
|
NodeOrText::AppendNode(node) => sibling.insert_before(node),
|
|
NodeOrText::AppendText(text) => {
|
|
if let Some(previous_sibling) = sibling.previous_sibling() {
|
|
if let Some(existing) = previous_sibling.as_text() {
|
|
existing.borrow_mut().push_str(&text);
|
|
return;
|
|
}
|
|
}
|
|
sibling.insert_before(NodeRef::new_text(text))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn append_doctype_to_document(
|
|
&mut self,
|
|
name: StrTendril,
|
|
public_id: StrTendril,
|
|
system_id: StrTendril,
|
|
) {
|
|
self.document_node
|
|
.append(NodeRef::new_doctype(name, public_id, system_id))
|
|
}
|
|
|
|
#[inline]
|
|
fn add_attrs_if_missing(&mut self, target: &NodeRef, attrs: Vec<Attribute>) {
|
|
let element = target.as_element().unwrap();
|
|
let mut attributes = element.attributes.borrow_mut();
|
|
|
|
for Attribute {
|
|
name: QualName { prefix, ns, local },
|
|
value,
|
|
} in attrs
|
|
{
|
|
attributes
|
|
.map
|
|
.entry(attributes::ExpandedName { ns, local })
|
|
.or_insert_with(|| {
|
|
let value = String::from(value);
|
|
attributes::Attribute { prefix, value }
|
|
});
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn remove_from_parent(&mut self, target: &NodeRef) {
|
|
target.detach()
|
|
}
|
|
|
|
#[inline]
|
|
fn reparent_children(&mut self, node: &NodeRef, new_parent: &NodeRef) {
|
|
// FIXME: Can this be done more effciently in rctree,
|
|
// by moving the whole linked list of children at once?
|
|
for child in node.children() {
|
|
new_parent.append(child)
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn mark_script_already_started(&mut self, _node: &NodeRef) {
|
|
// FIXME: Is this useful outside of a browser?
|
|
}
|
|
|
|
#[inline]
|
|
fn get_template_contents(&mut self, target: &NodeRef) -> NodeRef {
|
|
target
|
|
.as_element()
|
|
.unwrap()
|
|
.template_contents
|
|
.clone()
|
|
.unwrap()
|
|
}
|
|
|
|
fn append_based_on_parent_node(
|
|
&mut self,
|
|
element: &NodeRef,
|
|
prev_element: &NodeRef,
|
|
child: NodeOrText<NodeRef>,
|
|
) {
|
|
if element.parent().is_some() {
|
|
self.append_before_sibling(element, child)
|
|
} else {
|
|
self.append(prev_element, child)
|
|
}
|
|
}
|
|
}
|