490 lines
16 KiB
Rust
490 lines
16 KiB
Rust
|
use html5ever::tree_builder::QuirksMode;
|
|||
|
use html5ever::QualName;
|
|||
|
use std::cell::{Cell, RefCell};
|
|||
|
use std::fmt;
|
|||
|
use std::ops::Deref;
|
|||
|
use std::rc::{Rc, Weak};
|
|||
|
|
|||
|
use crate::attributes::{Attribute, Attributes, ExpandedName};
|
|||
|
use crate::cell_extras::*;
|
|||
|
use crate::iter::NodeIterator;
|
|||
|
|
|||
|
/// Node data specific to the node type.
|
|||
|
#[derive(Debug, PartialEq, Clone)]
|
|||
|
pub enum NodeData {
|
|||
|
/// Element node
|
|||
|
Element(ElementData),
|
|||
|
|
|||
|
/// Text node
|
|||
|
Text(RefCell<String>),
|
|||
|
|
|||
|
/// Comment node
|
|||
|
Comment(RefCell<String>),
|
|||
|
|
|||
|
/// Processing instruction node
|
|||
|
ProcessingInstruction(RefCell<(String, String)>),
|
|||
|
|
|||
|
/// Doctype node
|
|||
|
Doctype(Doctype),
|
|||
|
|
|||
|
/// Document node
|
|||
|
Document(DocumentData),
|
|||
|
|
|||
|
/// Document fragment node
|
|||
|
DocumentFragment,
|
|||
|
}
|
|||
|
|
|||
|
/// Data specific to doctype nodes.
|
|||
|
#[derive(Debug, PartialEq, Clone)]
|
|||
|
pub struct Doctype {
|
|||
|
/// The name of the doctype
|
|||
|
pub name: String,
|
|||
|
|
|||
|
/// The public ID of the doctype
|
|||
|
pub public_id: String,
|
|||
|
|
|||
|
/// The system ID of the doctype
|
|||
|
pub system_id: String,
|
|||
|
}
|
|||
|
|
|||
|
/// Data specific to element nodes.
|
|||
|
#[derive(Debug, PartialEq, Clone)]
|
|||
|
pub struct ElementData {
|
|||
|
/// The namespace and local name of the element, such as `ns!(html)` and `body`.
|
|||
|
pub name: QualName,
|
|||
|
|
|||
|
/// The attributes of the elements.
|
|||
|
pub attributes: RefCell<Attributes>,
|
|||
|
|
|||
|
/// If the element is an HTML `<template>` element,
|
|||
|
/// the document fragment node that is the root of template contents.
|
|||
|
pub template_contents: Option<NodeRef>,
|
|||
|
}
|
|||
|
|
|||
|
/// Data specific to document nodes.
|
|||
|
#[derive(Debug, PartialEq, Clone)]
|
|||
|
pub struct DocumentData {
|
|||
|
#[doc(hidden)]
|
|||
|
pub _quirks_mode: Cell<QuirksMode>,
|
|||
|
}
|
|||
|
|
|||
|
impl DocumentData {
|
|||
|
/// The quirks mode of the document, as determined by the HTML parser.
|
|||
|
#[inline]
|
|||
|
pub fn quirks_mode(&self) -> QuirksMode {
|
|||
|
self._quirks_mode.get()
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// A strong reference to a node.
|
|||
|
///
|
|||
|
/// A node is destroyed when the last strong reference to it dropped.
|
|||
|
///
|
|||
|
/// Each node holds a strong reference to its first child and next sibling (if any),
|
|||
|
/// but only a weak reference to its last child, previous sibling, and parent.
|
|||
|
/// This is to avoid strong reference cycles, which would cause memory leaks.
|
|||
|
///
|
|||
|
/// As a result, a single `NodeRef` is sufficient to keep alive a node
|
|||
|
/// and nodes that are after it in tree order
|
|||
|
/// (its descendants, its following siblings, and their descendants)
|
|||
|
/// but not other nodes in a tree.
|
|||
|
///
|
|||
|
/// To avoid detroying nodes prematurely,
|
|||
|
/// programs typically hold a strong reference to the root of a document
|
|||
|
/// until they’re done with that document.
|
|||
|
#[derive(Clone, Debug)]
|
|||
|
pub struct NodeRef(pub Rc<Node>);
|
|||
|
|
|||
|
impl Deref for NodeRef {
|
|||
|
type Target = Node;
|
|||
|
#[inline]
|
|||
|
fn deref(&self) -> &Node {
|
|||
|
&*self.0
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
impl Eq for NodeRef {}
|
|||
|
impl PartialEq for NodeRef {
|
|||
|
#[inline]
|
|||
|
fn eq(&self, other: &NodeRef) -> bool {
|
|||
|
let a: *const Node = &*self.0;
|
|||
|
let b: *const Node = &*other.0;
|
|||
|
a == b
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// A node inside a DOM-like tree.
|
|||
|
pub struct Node {
|
|||
|
parent: Cell<Option<Weak<Node>>>,
|
|||
|
previous_sibling: Cell<Option<Weak<Node>>>,
|
|||
|
next_sibling: Cell<Option<Rc<Node>>>,
|
|||
|
first_child: Cell<Option<Rc<Node>>>,
|
|||
|
last_child: Cell<Option<Weak<Node>>>,
|
|||
|
data: NodeData,
|
|||
|
}
|
|||
|
|
|||
|
impl fmt::Debug for Node {
|
|||
|
#[inline]
|
|||
|
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
|||
|
write!(f, "{:?} @ {:?}", self.data, self as *const Node)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// Prevent implicit recursion when dropping nodes to avoid overflowing the stack.
|
|||
|
///
|
|||
|
/// The implicit drop is correct, but recursive.
|
|||
|
/// In the worst case (where no node has both a next sibling and a child),
|
|||
|
/// a tree of a few tens of thousands of nodes could cause a stack overflow.
|
|||
|
///
|
|||
|
/// This `Drop` implementations makes sure the recursion does not happen.
|
|||
|
/// Instead, it has an explicit `Vec<Rc<Node>>` stack to traverse the subtree,
|
|||
|
/// but only following `Rc<Node>` references that are "unique":
|
|||
|
/// that have a strong reference count of 1.
|
|||
|
/// Those are the nodes that would have been dropped recursively.
|
|||
|
///
|
|||
|
/// The stack holds ancestors of the current node rather than preceding siblings,
|
|||
|
/// on the assumption that large document trees are typically wider than deep.
|
|||
|
impl Drop for Node {
|
|||
|
fn drop(&mut self) {
|
|||
|
// `.take_if_unique_strong()` temporarily leaves the tree in an inconsistent state,
|
|||
|
// as the corresponding `Weak` reference in the other direction is not removed.
|
|||
|
// It is important that all `Some(_)` strong references it returns
|
|||
|
// are dropped by the end of this `drop` call,
|
|||
|
// and that no user code is invoked in-between.
|
|||
|
|
|||
|
// Sharing `stack` between these two calls is not necessary,
|
|||
|
// but it allows re-using memory allocations.
|
|||
|
let mut stack = Vec::new();
|
|||
|
if let Some(rc) = self.first_child.take_if_unique_strong() {
|
|||
|
non_recursive_drop_unique_rc(rc, &mut stack);
|
|||
|
}
|
|||
|
if let Some(rc) = self.next_sibling.take_if_unique_strong() {
|
|||
|
non_recursive_drop_unique_rc(rc, &mut stack);
|
|||
|
}
|
|||
|
|
|||
|
fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
|
|||
|
loop {
|
|||
|
if let Some(child) = rc.first_child.take_if_unique_strong() {
|
|||
|
stack.push(rc);
|
|||
|
rc = child;
|
|||
|
continue;
|
|||
|
}
|
|||
|
if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
|
|||
|
// The previous value of `rc: Rc<Node>` is dropped here.
|
|||
|
// Since it was unique, the corresponding `Node` is dropped as well.
|
|||
|
// `<Node as Drop>::drop` does not call `drop_rc`
|
|||
|
// as both the first child and next sibling were already taken.
|
|||
|
// Weak reference counts decremented here for `Cell`s that are `Some`:
|
|||
|
// * `rc.parent`: still has a strong reference in `stack` or elsewhere
|
|||
|
// * `rc.last_child`: this is the last weak ref. Deallocated now.
|
|||
|
// * `rc.previous_sibling`: this is the last weak ref. Deallocated now.
|
|||
|
rc = sibling;
|
|||
|
continue;
|
|||
|
}
|
|||
|
if let Some(parent) = stack.pop() {
|
|||
|
// Same as in the above comment.
|
|||
|
rc = parent;
|
|||
|
continue;
|
|||
|
}
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
impl NodeRef {
|
|||
|
/// Create a new node.
|
|||
|
#[inline]
|
|||
|
pub fn new(data: NodeData) -> NodeRef {
|
|||
|
NodeRef(Rc::new(Node {
|
|||
|
parent: Cell::new(None),
|
|||
|
first_child: Cell::new(None),
|
|||
|
last_child: Cell::new(None),
|
|||
|
previous_sibling: Cell::new(None),
|
|||
|
next_sibling: Cell::new(None),
|
|||
|
data,
|
|||
|
}))
|
|||
|
}
|
|||
|
|
|||
|
/// Create a new element node.
|
|||
|
#[inline]
|
|||
|
pub fn new_element<I>(name: QualName, attributes: I) -> NodeRef
|
|||
|
where
|
|||
|
I: IntoIterator<Item = (ExpandedName, Attribute)>,
|
|||
|
{
|
|||
|
NodeRef::new(NodeData::Element(ElementData {
|
|||
|
template_contents: if name.expanded() == expanded_name!(html "template") {
|
|||
|
Some(NodeRef::new(NodeData::DocumentFragment))
|
|||
|
} else {
|
|||
|
None
|
|||
|
},
|
|||
|
name,
|
|||
|
attributes: RefCell::new(Attributes {
|
|||
|
map: attributes.into_iter().collect(),
|
|||
|
}),
|
|||
|
}))
|
|||
|
}
|
|||
|
|
|||
|
/// Create a new text node.
|
|||
|
#[inline]
|
|||
|
pub fn new_text<T: Into<String>>(value: T) -> NodeRef {
|
|||
|
NodeRef::new(NodeData::Text(RefCell::new(value.into())))
|
|||
|
}
|
|||
|
|
|||
|
/// Create a new comment node.
|
|||
|
#[inline]
|
|||
|
pub fn new_comment<T: Into<String>>(value: T) -> NodeRef {
|
|||
|
NodeRef::new(NodeData::Comment(RefCell::new(value.into())))
|
|||
|
}
|
|||
|
|
|||
|
/// Create a new processing instruction node.
|
|||
|
#[inline]
|
|||
|
pub fn new_processing_instruction<T1, T2>(target: T1, data: T2) -> NodeRef
|
|||
|
where
|
|||
|
T1: Into<String>,
|
|||
|
T2: Into<String>,
|
|||
|
{
|
|||
|
NodeRef::new(NodeData::ProcessingInstruction(RefCell::new((
|
|||
|
target.into(),
|
|||
|
data.into(),
|
|||
|
))))
|
|||
|
}
|
|||
|
|
|||
|
/// Create a new doctype node.
|
|||
|
#[inline]
|
|||
|
pub fn new_doctype<T1, T2, T3>(name: T1, public_id: T2, system_id: T3) -> NodeRef
|
|||
|
where
|
|||
|
T1: Into<String>,
|
|||
|
T2: Into<String>,
|
|||
|
T3: Into<String>,
|
|||
|
{
|
|||
|
NodeRef::new(NodeData::Doctype(Doctype {
|
|||
|
name: name.into(),
|
|||
|
public_id: public_id.into(),
|
|||
|
system_id: system_id.into(),
|
|||
|
}))
|
|||
|
}
|
|||
|
|
|||
|
/// Create a new document node.
|
|||
|
#[inline]
|
|||
|
pub fn new_document() -> NodeRef {
|
|||
|
NodeRef::new(NodeData::Document(DocumentData {
|
|||
|
_quirks_mode: Cell::new(QuirksMode::NoQuirks),
|
|||
|
}))
|
|||
|
}
|
|||
|
|
|||
|
/// Return the concatenation of all text nodes in this subtree.
|
|||
|
pub fn text_contents(&self) -> String {
|
|||
|
let mut s = String::new();
|
|||
|
for text_node in self.inclusive_descendants().text_nodes() {
|
|||
|
s.push_str(&text_node.borrow());
|
|||
|
}
|
|||
|
s
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
impl Node {
|
|||
|
/// Return a reference to this node’s node-type-specific data.
|
|||
|
#[inline]
|
|||
|
pub fn data(&self) -> &NodeData {
|
|||
|
&self.data
|
|||
|
}
|
|||
|
|
|||
|
/// If this node is an element, return a reference to element-specific data.
|
|||
|
#[inline]
|
|||
|
pub fn as_element(&self) -> Option<&ElementData> {
|
|||
|
match self.data {
|
|||
|
NodeData::Element(ref value) => Some(value),
|
|||
|
_ => None,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// If this node is a text node, return a reference to its contents.
|
|||
|
#[inline]
|
|||
|
pub fn as_text(&self) -> Option<&RefCell<String>> {
|
|||
|
match self.data {
|
|||
|
NodeData::Text(ref value) => Some(value),
|
|||
|
_ => None,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// If this node is a comment, return a reference to its contents.
|
|||
|
#[inline]
|
|||
|
pub fn as_comment(&self) -> Option<&RefCell<String>> {
|
|||
|
match self.data {
|
|||
|
NodeData::Comment(ref value) => Some(value),
|
|||
|
_ => None,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// If this node is a document, return a reference to doctype-specific data.
|
|||
|
#[inline]
|
|||
|
pub fn as_doctype(&self) -> Option<&Doctype> {
|
|||
|
match self.data {
|
|||
|
NodeData::Doctype(ref value) => Some(value),
|
|||
|
_ => None,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// If this node is a document, return a reference to document-specific data.
|
|||
|
#[inline]
|
|||
|
pub fn as_document(&self) -> Option<&DocumentData> {
|
|||
|
match self.data {
|
|||
|
NodeData::Document(ref value) => Some(value),
|
|||
|
_ => None,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// Return a reference to the parent node, unless this node is the root of the tree.
|
|||
|
#[inline]
|
|||
|
pub fn parent(&self) -> Option<NodeRef> {
|
|||
|
self.parent.upgrade().map(NodeRef)
|
|||
|
}
|
|||
|
|
|||
|
/// Return a reference to the first child of this node, unless it has no child.
|
|||
|
#[inline]
|
|||
|
pub fn first_child(&self) -> Option<NodeRef> {
|
|||
|
self.first_child.clone_inner().map(NodeRef)
|
|||
|
}
|
|||
|
|
|||
|
/// Return a reference to the last child of this node, unless it has no child.
|
|||
|
#[inline]
|
|||
|
pub fn last_child(&self) -> Option<NodeRef> {
|
|||
|
self.last_child.upgrade().map(NodeRef)
|
|||
|
}
|
|||
|
|
|||
|
/// Return a reference to the previous sibling of this node, unless it is a first child.
|
|||
|
#[inline]
|
|||
|
pub fn previous_sibling(&self) -> Option<NodeRef> {
|
|||
|
self.previous_sibling.upgrade().map(NodeRef)
|
|||
|
}
|
|||
|
|
|||
|
/// Return a reference to the next sibling of this node, unless it is a last child.
|
|||
|
#[inline]
|
|||
|
pub fn next_sibling(&self) -> Option<NodeRef> {
|
|||
|
self.next_sibling.clone_inner().map(NodeRef)
|
|||
|
}
|
|||
|
|
|||
|
/// Detach a node from its parent and siblings. Children are not affected.
|
|||
|
///
|
|||
|
/// To remove a node and its descendants, detach it and drop any strong reference to it.
|
|||
|
pub fn detach(&self) {
|
|||
|
let parent_weak = self.parent.take();
|
|||
|
let previous_sibling_weak = self.previous_sibling.take();
|
|||
|
let next_sibling_strong = self.next_sibling.take();
|
|||
|
|
|||
|
let previous_sibling_opt = previous_sibling_weak
|
|||
|
.as_ref()
|
|||
|
.and_then(|weak| weak.upgrade());
|
|||
|
|
|||
|
if let Some(next_sibling_ref) = next_sibling_strong.as_ref() {
|
|||
|
next_sibling_ref
|
|||
|
.previous_sibling
|
|||
|
.replace(previous_sibling_weak);
|
|||
|
} else if let Some(parent_ref) = parent_weak.as_ref() {
|
|||
|
if let Some(parent_strong) = parent_ref.upgrade() {
|
|||
|
parent_strong.last_child.replace(previous_sibling_weak);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if let Some(previous_sibling_strong) = previous_sibling_opt {
|
|||
|
previous_sibling_strong
|
|||
|
.next_sibling
|
|||
|
.replace(next_sibling_strong);
|
|||
|
} else if let Some(parent_ref) = parent_weak.as_ref() {
|
|||
|
if let Some(parent_strong) = parent_ref.upgrade() {
|
|||
|
parent_strong.first_child.replace(next_sibling_strong);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
impl NodeRef {
|
|||
|
/// Append a new child to this node, after existing children.
|
|||
|
///
|
|||
|
/// The new child is detached from its previous position.
|
|||
|
pub fn append(&self, new_child: NodeRef) {
|
|||
|
new_child.detach();
|
|||
|
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
|
|||
|
if let Some(last_child_weak) = self.last_child.replace(Some(Rc::downgrade(&new_child.0))) {
|
|||
|
if let Some(last_child) = last_child_weak.upgrade() {
|
|||
|
new_child.previous_sibling.replace(Some(last_child_weak));
|
|||
|
debug_assert!(last_child.next_sibling.is_none());
|
|||
|
last_child.next_sibling.replace(Some(new_child.0));
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
debug_assert!(self.first_child.is_none());
|
|||
|
self.first_child.replace(Some(new_child.0));
|
|||
|
}
|
|||
|
|
|||
|
/// Prepend a new child to this node, before existing children.
|
|||
|
///
|
|||
|
/// The new child is detached from its previous position.
|
|||
|
pub fn prepend(&self, new_child: NodeRef) {
|
|||
|
new_child.detach();
|
|||
|
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
|
|||
|
if let Some(first_child) = self.first_child.take() {
|
|||
|
debug_assert!(first_child.previous_sibling.is_none());
|
|||
|
first_child
|
|||
|
.previous_sibling
|
|||
|
.replace(Some(Rc::downgrade(&new_child.0)));
|
|||
|
new_child.next_sibling.replace(Some(first_child));
|
|||
|
} else {
|
|||
|
debug_assert!(self.first_child.is_none());
|
|||
|
self.last_child.replace(Some(Rc::downgrade(&new_child.0)));
|
|||
|
}
|
|||
|
self.first_child.replace(Some(new_child.0));
|
|||
|
}
|
|||
|
|
|||
|
/// Insert a new sibling after this node.
|
|||
|
///
|
|||
|
/// The new sibling is detached from its previous position.
|
|||
|
pub fn insert_after(&self, new_sibling: NodeRef) {
|
|||
|
new_sibling.detach();
|
|||
|
new_sibling.parent.replace(self.parent.clone_inner());
|
|||
|
new_sibling
|
|||
|
.previous_sibling
|
|||
|
.replace(Some(Rc::downgrade(&self.0)));
|
|||
|
if let Some(next_sibling) = self.next_sibling.take() {
|
|||
|
debug_assert!(next_sibling.previous_sibling().unwrap() == *self);
|
|||
|
next_sibling
|
|||
|
.previous_sibling
|
|||
|
.replace(Some(Rc::downgrade(&new_sibling.0)));
|
|||
|
new_sibling.next_sibling.replace(Some(next_sibling));
|
|||
|
} else if let Some(parent) = self.parent() {
|
|||
|
debug_assert!(parent.last_child().unwrap() == *self);
|
|||
|
parent
|
|||
|
.last_child
|
|||
|
.replace(Some(Rc::downgrade(&new_sibling.0)));
|
|||
|
}
|
|||
|
self.next_sibling.replace(Some(new_sibling.0));
|
|||
|
}
|
|||
|
|
|||
|
/// Insert a new sibling before this node.
|
|||
|
///
|
|||
|
/// The new sibling is detached from its previous position.
|
|||
|
pub fn insert_before(&self, new_sibling: NodeRef) {
|
|||
|
new_sibling.detach();
|
|||
|
new_sibling.parent.replace(self.parent.clone_inner());
|
|||
|
new_sibling.next_sibling.replace(Some(self.0.clone()));
|
|||
|
if let Some(previous_sibling_weak) = self
|
|||
|
.previous_sibling
|
|||
|
.replace(Some(Rc::downgrade(&new_sibling.0)))
|
|||
|
{
|
|||
|
if let Some(previous_sibling) = previous_sibling_weak.upgrade() {
|
|||
|
new_sibling
|
|||
|
.previous_sibling
|
|||
|
.replace(Some(previous_sibling_weak));
|
|||
|
debug_assert!(previous_sibling.next_sibling().unwrap() == *self);
|
|||
|
previous_sibling.next_sibling.replace(Some(new_sibling.0));
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
if let Some(parent) = self.parent() {
|
|||
|
debug_assert!(parent.first_child().unwrap() == *self);
|
|||
|
parent.first_child.replace(Some(new_sibling.0));
|
|||
|
}
|
|||
|
}
|
|||
|
}
|