newsletter-to-web/kuchiki/src/tree.rs

490 lines
16 KiB
Rust
Raw Normal View History

use html5ever::tree_builder::QuirksMode;
use html5ever::QualName;
use std::cell::{Cell, RefCell};
use std::fmt;
use std::ops::Deref;
use std::rc::{Rc, Weak};
use crate::attributes::{Attribute, Attributes, ExpandedName};
use crate::cell_extras::*;
use crate::iter::NodeIterator;
/// Node data specific to the node type.
#[derive(Debug, PartialEq, Clone)]
pub enum NodeData {
/// Element node
Element(ElementData),
/// Text node
Text(RefCell<String>),
/// Comment node
Comment(RefCell<String>),
/// Processing instruction node
ProcessingInstruction(RefCell<(String, String)>),
/// Doctype node
Doctype(Doctype),
/// Document node
Document(DocumentData),
/// Document fragment node
DocumentFragment,
}
/// Data specific to doctype nodes.
#[derive(Debug, PartialEq, Clone)]
pub struct Doctype {
/// The name of the doctype
pub name: String,
/// The public ID of the doctype
pub public_id: String,
/// The system ID of the doctype
pub system_id: String,
}
/// Data specific to element nodes.
#[derive(Debug, PartialEq, Clone)]
pub struct ElementData {
/// The namespace and local name of the element, such as `ns!(html)` and `body`.
pub name: QualName,
/// The attributes of the elements.
pub attributes: RefCell<Attributes>,
/// If the element is an HTML `<template>` element,
/// the document fragment node that is the root of template contents.
pub template_contents: Option<NodeRef>,
}
/// Data specific to document nodes.
#[derive(Debug, PartialEq, Clone)]
pub struct DocumentData {
#[doc(hidden)]
pub _quirks_mode: Cell<QuirksMode>,
}
impl DocumentData {
/// The quirks mode of the document, as determined by the HTML parser.
#[inline]
pub fn quirks_mode(&self) -> QuirksMode {
self._quirks_mode.get()
}
}
/// A strong reference to a node.
///
/// A node is destroyed when the last strong reference to it dropped.
///
/// Each node holds a strong reference to its first child and next sibling (if any),
/// but only a weak reference to its last child, previous sibling, and parent.
/// This is to avoid strong reference cycles, which would cause memory leaks.
///
/// As a result, a single `NodeRef` is sufficient to keep alive a node
/// and nodes that are after it in tree order
/// (its descendants, its following siblings, and their descendants)
/// but not other nodes in a tree.
///
/// To avoid detroying nodes prematurely,
/// programs typically hold a strong reference to the root of a document
/// until theyre done with that document.
#[derive(Clone, Debug)]
pub struct NodeRef(pub Rc<Node>);
impl Deref for NodeRef {
type Target = Node;
#[inline]
fn deref(&self) -> &Node {
&*self.0
}
}
impl Eq for NodeRef {}
impl PartialEq for NodeRef {
#[inline]
fn eq(&self, other: &NodeRef) -> bool {
let a: *const Node = &*self.0;
let b: *const Node = &*other.0;
a == b
}
}
/// A node inside a DOM-like tree.
pub struct Node {
parent: Cell<Option<Weak<Node>>>,
previous_sibling: Cell<Option<Weak<Node>>>,
next_sibling: Cell<Option<Rc<Node>>>,
first_child: Cell<Option<Rc<Node>>>,
last_child: Cell<Option<Weak<Node>>>,
data: NodeData,
}
impl fmt::Debug for Node {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:?} @ {:?}", self.data, self as *const Node)
}
}
/// Prevent implicit recursion when dropping nodes to avoid overflowing the stack.
///
/// The implicit drop is correct, but recursive.
/// In the worst case (where no node has both a next sibling and a child),
/// a tree of a few tens of thousands of nodes could cause a stack overflow.
///
/// This `Drop` implementations makes sure the recursion does not happen.
/// Instead, it has an explicit `Vec<Rc<Node>>` stack to traverse the subtree,
/// but only following `Rc<Node>` references that are "unique":
/// that have a strong reference count of 1.
/// Those are the nodes that would have been dropped recursively.
///
/// The stack holds ancestors of the current node rather than preceding siblings,
/// on the assumption that large document trees are typically wider than deep.
impl Drop for Node {
fn drop(&mut self) {
// `.take_if_unique_strong()` temporarily leaves the tree in an inconsistent state,
// as the corresponding `Weak` reference in the other direction is not removed.
// It is important that all `Some(_)` strong references it returns
// are dropped by the end of this `drop` call,
// and that no user code is invoked in-between.
// Sharing `stack` between these two calls is not necessary,
// but it allows re-using memory allocations.
let mut stack = Vec::new();
if let Some(rc) = self.first_child.take_if_unique_strong() {
non_recursive_drop_unique_rc(rc, &mut stack);
}
if let Some(rc) = self.next_sibling.take_if_unique_strong() {
non_recursive_drop_unique_rc(rc, &mut stack);
}
fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
loop {
if let Some(child) = rc.first_child.take_if_unique_strong() {
stack.push(rc);
rc = child;
continue;
}
if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
// The previous value of `rc: Rc<Node>` is dropped here.
// Since it was unique, the corresponding `Node` is dropped as well.
// `<Node as Drop>::drop` does not call `drop_rc`
// as both the first child and next sibling were already taken.
// Weak reference counts decremented here for `Cell`s that are `Some`:
// * `rc.parent`: still has a strong reference in `stack` or elsewhere
// * `rc.last_child`: this is the last weak ref. Deallocated now.
// * `rc.previous_sibling`: this is the last weak ref. Deallocated now.
rc = sibling;
continue;
}
if let Some(parent) = stack.pop() {
// Same as in the above comment.
rc = parent;
continue;
}
return;
}
}
}
}
impl NodeRef {
/// Create a new node.
#[inline]
pub fn new(data: NodeData) -> NodeRef {
NodeRef(Rc::new(Node {
parent: Cell::new(None),
first_child: Cell::new(None),
last_child: Cell::new(None),
previous_sibling: Cell::new(None),
next_sibling: Cell::new(None),
data,
}))
}
/// Create a new element node.
#[inline]
pub fn new_element<I>(name: QualName, attributes: I) -> NodeRef
where
I: IntoIterator<Item = (ExpandedName, Attribute)>,
{
NodeRef::new(NodeData::Element(ElementData {
template_contents: if name.expanded() == expanded_name!(html "template") {
Some(NodeRef::new(NodeData::DocumentFragment))
} else {
None
},
name,
attributes: RefCell::new(Attributes {
map: attributes.into_iter().collect(),
}),
}))
}
/// Create a new text node.
#[inline]
pub fn new_text<T: Into<String>>(value: T) -> NodeRef {
NodeRef::new(NodeData::Text(RefCell::new(value.into())))
}
/// Create a new comment node.
#[inline]
pub fn new_comment<T: Into<String>>(value: T) -> NodeRef {
NodeRef::new(NodeData::Comment(RefCell::new(value.into())))
}
/// Create a new processing instruction node.
#[inline]
pub fn new_processing_instruction<T1, T2>(target: T1, data: T2) -> NodeRef
where
T1: Into<String>,
T2: Into<String>,
{
NodeRef::new(NodeData::ProcessingInstruction(RefCell::new((
target.into(),
data.into(),
))))
}
/// Create a new doctype node.
#[inline]
pub fn new_doctype<T1, T2, T3>(name: T1, public_id: T2, system_id: T3) -> NodeRef
where
T1: Into<String>,
T2: Into<String>,
T3: Into<String>,
{
NodeRef::new(NodeData::Doctype(Doctype {
name: name.into(),
public_id: public_id.into(),
system_id: system_id.into(),
}))
}
/// Create a new document node.
#[inline]
pub fn new_document() -> NodeRef {
NodeRef::new(NodeData::Document(DocumentData {
_quirks_mode: Cell::new(QuirksMode::NoQuirks),
}))
}
/// Return the concatenation of all text nodes in this subtree.
pub fn text_contents(&self) -> String {
let mut s = String::new();
for text_node in self.inclusive_descendants().text_nodes() {
s.push_str(&text_node.borrow());
}
s
}
}
impl Node {
/// Return a reference to this nodes node-type-specific data.
#[inline]
pub fn data(&self) -> &NodeData {
&self.data
}
/// If this node is an element, return a reference to element-specific data.
#[inline]
pub fn as_element(&self) -> Option<&ElementData> {
match self.data {
NodeData::Element(ref value) => Some(value),
_ => None,
}
}
/// If this node is a text node, return a reference to its contents.
#[inline]
pub fn as_text(&self) -> Option<&RefCell<String>> {
match self.data {
NodeData::Text(ref value) => Some(value),
_ => None,
}
}
/// If this node is a comment, return a reference to its contents.
#[inline]
pub fn as_comment(&self) -> Option<&RefCell<String>> {
match self.data {
NodeData::Comment(ref value) => Some(value),
_ => None,
}
}
/// If this node is a document, return a reference to doctype-specific data.
#[inline]
pub fn as_doctype(&self) -> Option<&Doctype> {
match self.data {
NodeData::Doctype(ref value) => Some(value),
_ => None,
}
}
/// If this node is a document, return a reference to document-specific data.
#[inline]
pub fn as_document(&self) -> Option<&DocumentData> {
match self.data {
NodeData::Document(ref value) => Some(value),
_ => None,
}
}
/// Return a reference to the parent node, unless this node is the root of the tree.
#[inline]
pub fn parent(&self) -> Option<NodeRef> {
self.parent.upgrade().map(NodeRef)
}
/// Return a reference to the first child of this node, unless it has no child.
#[inline]
pub fn first_child(&self) -> Option<NodeRef> {
self.first_child.clone_inner().map(NodeRef)
}
/// Return a reference to the last child of this node, unless it has no child.
#[inline]
pub fn last_child(&self) -> Option<NodeRef> {
self.last_child.upgrade().map(NodeRef)
}
/// Return a reference to the previous sibling of this node, unless it is a first child.
#[inline]
pub fn previous_sibling(&self) -> Option<NodeRef> {
self.previous_sibling.upgrade().map(NodeRef)
}
/// Return a reference to the next sibling of this node, unless it is a last child.
#[inline]
pub fn next_sibling(&self) -> Option<NodeRef> {
self.next_sibling.clone_inner().map(NodeRef)
}
/// Detach a node from its parent and siblings. Children are not affected.
///
/// To remove a node and its descendants, detach it and drop any strong reference to it.
pub fn detach(&self) {
let parent_weak = self.parent.take();
let previous_sibling_weak = self.previous_sibling.take();
let next_sibling_strong = self.next_sibling.take();
let previous_sibling_opt = previous_sibling_weak
.as_ref()
.and_then(|weak| weak.upgrade());
if let Some(next_sibling_ref) = next_sibling_strong.as_ref() {
next_sibling_ref
.previous_sibling
.replace(previous_sibling_weak);
} else if let Some(parent_ref) = parent_weak.as_ref() {
if let Some(parent_strong) = parent_ref.upgrade() {
parent_strong.last_child.replace(previous_sibling_weak);
}
}
if let Some(previous_sibling_strong) = previous_sibling_opt {
previous_sibling_strong
.next_sibling
.replace(next_sibling_strong);
} else if let Some(parent_ref) = parent_weak.as_ref() {
if let Some(parent_strong) = parent_ref.upgrade() {
parent_strong.first_child.replace(next_sibling_strong);
}
}
}
}
impl NodeRef {
/// Append a new child to this node, after existing children.
///
/// The new child is detached from its previous position.
pub fn append(&self, new_child: NodeRef) {
new_child.detach();
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
if let Some(last_child_weak) = self.last_child.replace(Some(Rc::downgrade(&new_child.0))) {
if let Some(last_child) = last_child_weak.upgrade() {
new_child.previous_sibling.replace(Some(last_child_weak));
debug_assert!(last_child.next_sibling.is_none());
last_child.next_sibling.replace(Some(new_child.0));
return;
}
}
debug_assert!(self.first_child.is_none());
self.first_child.replace(Some(new_child.0));
}
/// Prepend a new child to this node, before existing children.
///
/// The new child is detached from its previous position.
pub fn prepend(&self, new_child: NodeRef) {
new_child.detach();
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
if let Some(first_child) = self.first_child.take() {
debug_assert!(first_child.previous_sibling.is_none());
first_child
.previous_sibling
.replace(Some(Rc::downgrade(&new_child.0)));
new_child.next_sibling.replace(Some(first_child));
} else {
debug_assert!(self.first_child.is_none());
self.last_child.replace(Some(Rc::downgrade(&new_child.0)));
}
self.first_child.replace(Some(new_child.0));
}
/// Insert a new sibling after this node.
///
/// The new sibling is detached from its previous position.
pub fn insert_after(&self, new_sibling: NodeRef) {
new_sibling.detach();
new_sibling.parent.replace(self.parent.clone_inner());
new_sibling
.previous_sibling
.replace(Some(Rc::downgrade(&self.0)));
if let Some(next_sibling) = self.next_sibling.take() {
debug_assert!(next_sibling.previous_sibling().unwrap() == *self);
next_sibling
.previous_sibling
.replace(Some(Rc::downgrade(&new_sibling.0)));
new_sibling.next_sibling.replace(Some(next_sibling));
} else if let Some(parent) = self.parent() {
debug_assert!(parent.last_child().unwrap() == *self);
parent
.last_child
.replace(Some(Rc::downgrade(&new_sibling.0)));
}
self.next_sibling.replace(Some(new_sibling.0));
}
/// Insert a new sibling before this node.
///
/// The new sibling is detached from its previous position.
pub fn insert_before(&self, new_sibling: NodeRef) {
new_sibling.detach();
new_sibling.parent.replace(self.parent.clone_inner());
new_sibling.next_sibling.replace(Some(self.0.clone()));
if let Some(previous_sibling_weak) = self
.previous_sibling
.replace(Some(Rc::downgrade(&new_sibling.0)))
{
if let Some(previous_sibling) = previous_sibling_weak.upgrade() {
new_sibling
.previous_sibling
.replace(Some(previous_sibling_weak));
debug_assert!(previous_sibling.next_sibling().unwrap() == *self);
previous_sibling.next_sibling.replace(Some(new_sibling.0));
return;
}
}
if let Some(parent) = self.parent() {
debug_assert!(parent.first_child().unwrap() == *self);
parent.first_child.replace(Some(new_sibling.0));
}
}
}