Add fork of kuchiki
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
parent
4e3f7b46da
commit
ecb435bbc4
|
@ -3,4 +3,5 @@
|
|||
members = [
|
||||
"bin",
|
||||
"sanitize-html-rs",
|
||||
"kuchiki",
|
||||
]
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
target
|
||||
Cargo.lock
|
||||
.cargo/config
|
|
@ -0,0 +1,6 @@
|
|||
sudo: false
|
||||
language: rust
|
||||
rust:
|
||||
- nightly
|
||||
- beta
|
||||
- stable
|
|
@ -0,0 +1,22 @@
|
|||
[package]
|
||||
name = "kuchiki"
|
||||
version = "0.8.1"
|
||||
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
|
||||
license = "MIT"
|
||||
description = "(朽木) HTML/XML tree manipulation library"
|
||||
repository = "https://github.com/kuchiki-rs/kuchiki"
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "kuchiki"
|
||||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
cssparser = "0.27"
|
||||
matches = "0.1.4"
|
||||
html5ever = "0.25"
|
||||
selectors = "0.22"
|
||||
indexmap = "1.6.0"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
|
@ -0,0 +1,23 @@
|
|||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,10 @@
|
|||
Kuchiki (朽木)
|
||||
==============
|
||||
|
||||
HTML/XML¹ tree manipulation library for Rust.
|
||||
|
||||
[Documentation](https://docs.rs/kuchiki/)
|
||||
|
||||
See [users.rust-lang.org discussion](http://users.rust-lang.org/t/kuchiki-a-vaporware-html-xml-tree-manipulation-library/435).
|
||||
|
||||
¹ There is no support for XML syntax yet. The plan is to integrate with an existing parser.
|
|
@ -0,0 +1,3 @@
|
|||
<meta http-equiv="refresh" content="0; url=https://docs.rs/kuchiki/">
|
||||
<link rel="canonical" href="https://docs.rs/kuchiki/">
|
||||
<a href="https://docs.rs/kuchiki/">Moved to docs.rs</a>
|
|
@ -0,0 +1,3 @@
|
|||
<meta http-equiv="refresh" content="0; url=https://docs.rs/kuchiki/">
|
||||
<link rel="canonical" href="https://docs.rs/kuchiki/">
|
||||
<a href="https://docs.rs/kuchiki/">Moved to docs.rs</a>
|
|
@ -0,0 +1,48 @@
|
|||
extern crate kuchiki;
|
||||
|
||||
use kuchiki::traits::*;
|
||||
|
||||
fn main() {
|
||||
let html = r"
|
||||
<DOCTYPE html>
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<h1>Example</h1>
|
||||
<p class='foo'>Hello, world!</p>
|
||||
<p class='foo'>I love HTML</p>
|
||||
</body>
|
||||
</html>
|
||||
";
|
||||
let css_selector = ".foo";
|
||||
|
||||
let document = kuchiki::parse_html().one(html);
|
||||
|
||||
for css_match in document.select(css_selector).unwrap() {
|
||||
// css_match is a NodeDataRef, but most of the interesting methods are
|
||||
// on NodeRef. Let's get the underlying NodeRef.
|
||||
let as_node = css_match.as_node();
|
||||
|
||||
// In this example, as_node represents an HTML node like
|
||||
//
|
||||
// <p class='foo'>Hello world!</p>"
|
||||
//
|
||||
// Which is distinct from just 'Hello world!'. To get rid of that <p>
|
||||
// tag, we're going to get each element's first child, which will be
|
||||
// a "text" node.
|
||||
//
|
||||
// There are other kinds of nodes, of course. The possibilities are all
|
||||
// listed in the `NodeData` enum in this crate.
|
||||
let text_node = as_node.first_child().unwrap();
|
||||
|
||||
// Let's get the actual text in this text node. A text node wraps around
|
||||
// a RefCell<String>, so we need to call borrow() to get a &str out.
|
||||
let text = text_node.as_text().unwrap().borrow();
|
||||
|
||||
// Prints:
|
||||
//
|
||||
// "Hello, world!"
|
||||
// "I love HTML"
|
||||
println!("{:?}", text);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
extern crate kuchiki;
|
||||
|
||||
fn main() {
|
||||
let mut depth = 2;
|
||||
// 20 M nodes is a few GB of memory.
|
||||
while depth <= 20_000_000 {
|
||||
let mut node = kuchiki::NodeRef::new_text("");
|
||||
for _ in 0..depth {
|
||||
let parent = kuchiki::NodeRef::new_text("");
|
||||
parent.append(node);
|
||||
node = parent;
|
||||
}
|
||||
|
||||
println!("Trying to drop {} nodes...", depth);
|
||||
// Without an explicit `impl Drop for Node`,
|
||||
// depth = 20_000 causes "thread '<main>' has overflowed its stack"
|
||||
// on my machine (Linux x86_64).
|
||||
::std::mem::drop(node);
|
||||
|
||||
depth *= 10;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
use html5ever::{LocalName, Namespace, Prefix};
|
||||
use indexmap::{map::Entry, IndexMap};
|
||||
|
||||
/// Convenience wrapper around a indexmap that adds method for attributes in the null namespace.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Attributes {
|
||||
/// A map of attributes whose name can have namespaces.
|
||||
pub map: IndexMap<ExpandedName, Attribute>,
|
||||
}
|
||||
|
||||
/// <https://www.w3.org/TR/REC-xml-names/#dt-expname>
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
|
||||
pub struct ExpandedName {
|
||||
/// Namespace URL
|
||||
pub ns: Namespace,
|
||||
/// "Local" part of the name
|
||||
pub local: LocalName,
|
||||
}
|
||||
|
||||
impl ExpandedName {
|
||||
/// Trivial constructor
|
||||
pub fn new<N: Into<Namespace>, L: Into<LocalName>>(ns: N, local: L) -> Self {
|
||||
ExpandedName {
|
||||
ns: ns.into(),
|
||||
local: local.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The non-identifying parts of an attribute
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Attribute {
|
||||
/// The namespace prefix, if any
|
||||
pub prefix: Option<Prefix>,
|
||||
/// The attribute value
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
impl Attributes {
|
||||
/// Like IndexMap::contains
|
||||
pub fn contains<A: Into<LocalName>>(&self, local_name: A) -> bool {
|
||||
self.map.contains_key(&ExpandedName::new(ns!(), local_name))
|
||||
}
|
||||
|
||||
/// Like IndexMap::get
|
||||
pub fn get<A: Into<LocalName>>(&self, local_name: A) -> Option<&str> {
|
||||
self.map
|
||||
.get(&ExpandedName::new(ns!(), local_name))
|
||||
.map(|attr| &*attr.value)
|
||||
}
|
||||
|
||||
/// Like IndexMap::get_mut
|
||||
pub fn get_mut<A: Into<LocalName>>(&mut self, local_name: A) -> Option<&mut String> {
|
||||
self.map
|
||||
.get_mut(&ExpandedName::new(ns!(), local_name))
|
||||
.map(|attr| &mut attr.value)
|
||||
}
|
||||
|
||||
/// Like IndexMap::entry
|
||||
pub fn entry<A: Into<LocalName>>(&mut self, local_name: A) -> Entry<ExpandedName, Attribute> {
|
||||
self.map.entry(ExpandedName::new(ns!(), local_name))
|
||||
}
|
||||
|
||||
/// Like IndexMap::insert
|
||||
pub fn insert<A: Into<LocalName>>(
|
||||
&mut self,
|
||||
local_name: A,
|
||||
value: String,
|
||||
) -> Option<Attribute> {
|
||||
self.map.insert(
|
||||
ExpandedName::new(ns!(), local_name),
|
||||
Attribute {
|
||||
prefix: None,
|
||||
value,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Like IndexMap::remove
|
||||
pub fn remove<A: Into<LocalName>>(&mut self, local_name: A) -> Option<Attribute> {
|
||||
self.map.remove(&ExpandedName::new(ns!(), local_name))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
//! Specialized methods for `Cell` of some specific `!Copy` types,
|
||||
//! allowing limited access to a value without moving it of the cell.
|
||||
//!
|
||||
//!
|
||||
//! # Soundness
|
||||
//!
|
||||
//! These methods use and `Cell::as_ptr` and `unsafe`.
|
||||
//! Their soundness lies in that:
|
||||
//!
|
||||
//! * `Cell<T>: !Sync` for any `T`, so no other thread is accessing this cell.
|
||||
//! * For the duration of the raw pointer access,
|
||||
//! this thread only runs code that is known to not access the same cell again.
|
||||
//! In particular, no method of a type paramater is called.
|
||||
//! For example, `clone_inner` would be unsound to generalize to any `Cell<T>`
|
||||
//! because it would involve running arbitrary code through `T::clone`
|
||||
//! and provide that code with a reference to the inside of the cell.
|
||||
//!
|
||||
//! ```rust
|
||||
//! struct Evil(Box<u32>, Rc<Cell<Option<Evil>>>);
|
||||
//! impl Clone for Evil {
|
||||
//! fn clone(&self) -> Self {
|
||||
//! mem::drop(self.1.take()); // Mess with the "other" node, which might be `self`.
|
||||
//! Evil(
|
||||
//! self.0.clone(), // possible use after free!
|
||||
//! Rc::new(Cell::new(None))
|
||||
//! )
|
||||
//! }
|
||||
//! }
|
||||
//! let a = Rc::new(Cell::new(None));
|
||||
//! a.set(Some(Evil(Box::new(5), a.clone()))); // Make a reference cycle.
|
||||
//! a.clone_inner();
|
||||
//! ```
|
||||
//!
|
||||
//! `Rc<T>::clone` and `Weak<T>::clone` do not have this problem
|
||||
//! as they only increment reference counts and never call `T::clone`.
|
||||
//!
|
||||
//!
|
||||
//! # Alternative
|
||||
//!
|
||||
//! To avoid using `unsafe` entirely, operating on a `T: !Copy` value inside a `Cell<T>`
|
||||
//! would require temporarily replacing it with a default value:
|
||||
//!
|
||||
//! ```rust
|
||||
//! fn option_dance<T, F, R>(cell: &Cell<T>, f: F) -> R
|
||||
//! where T: Default, F: FnOnce(&mut T) -> R
|
||||
//! {
|
||||
//! let mut value = cell.take();
|
||||
//! let result = f(&mut value);
|
||||
//! cell.set(value);
|
||||
//! result
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! It would be worth exploring whether LLVM can reliably optimize away these extra moves
|
||||
//! and compile the `Option` dance to assembly similar to that of the `unsafe` operation.
|
||||
|
||||
use std::cell::Cell;
|
||||
use std::rc::{Rc, Weak};
|
||||
|
||||
pub trait CellOption {
|
||||
fn is_none(&self) -> bool;
|
||||
}
|
||||
|
||||
impl<T> CellOption for Cell<Option<T>> {
|
||||
#[inline]
|
||||
fn is_none(&self) -> bool {
|
||||
unsafe { (*self.as_ptr()).is_none() }
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CellOptionWeak<T> {
|
||||
fn upgrade(&self) -> Option<Rc<T>>;
|
||||
fn clone_inner(&self) -> Option<Weak<T>>;
|
||||
}
|
||||
|
||||
impl<T> CellOptionWeak<T> for Cell<Option<Weak<T>>> {
|
||||
#[inline]
|
||||
fn upgrade(&self) -> Option<Rc<T>> {
|
||||
unsafe { (*self.as_ptr()).as_ref().and_then(Weak::upgrade) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn clone_inner(&self) -> Option<Weak<T>> {
|
||||
unsafe { (*self.as_ptr()).clone() }
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CellOptionRc<T> {
|
||||
/// Return `Some` if this `Rc` is the only strong reference count,
|
||||
/// even if there are weak references.
|
||||
fn take_if_unique_strong(&self) -> Option<Rc<T>>;
|
||||
fn clone_inner(&self) -> Option<Rc<T>>;
|
||||
}
|
||||
|
||||
impl<T> CellOptionRc<T> for Cell<Option<Rc<T>>> {
|
||||
#[inline]
|
||||
fn take_if_unique_strong(&self) -> Option<Rc<T>> {
|
||||
unsafe {
|
||||
match *self.as_ptr() {
|
||||
None => None,
|
||||
Some(ref rc) if Rc::strong_count(rc) > 1 => None,
|
||||
// Not borrowing the `Rc<T>` here
|
||||
// as we would be invalidating that borrow while it is outstanding:
|
||||
Some(_) => self.take(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn clone_inner(&self) -> Option<Rc<T>> {
|
||||
unsafe { (*self.as_ptr()).clone() }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,452 @@
|
|||
//! Node iterators
|
||||
|
||||
use std::borrow::Borrow;
|
||||
use std::cell::RefCell;
|
||||
use std::iter::Rev;
|
||||
|
||||
use crate::node_data_ref::NodeDataRef;
|
||||
use crate::select::Selectors;
|
||||
use crate::tree::{ElementData, NodeRef};
|
||||
|
||||
impl NodeRef {
|
||||
/// Return an iterator of references to this node and its ancestors.
|
||||
#[inline]
|
||||
pub fn inclusive_ancestors(&self) -> Ancestors {
|
||||
Ancestors(Some(self.clone()))
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s ancestors.
|
||||
#[inline]
|
||||
pub fn ancestors(&self) -> Ancestors {
|
||||
Ancestors(self.parent())
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node and the siblings before it.
|
||||
#[inline]
|
||||
pub fn inclusive_preceding_siblings(&self) -> Rev<Siblings> {
|
||||
match self.parent() {
|
||||
Some(parent) => {
|
||||
let first_sibling = parent.first_child().unwrap();
|
||||
debug_assert!(self.previous_sibling().is_some() || *self == first_sibling);
|
||||
Siblings(Some(State {
|
||||
next: first_sibling,
|
||||
next_back: self.clone(),
|
||||
}))
|
||||
}
|
||||
None => {
|
||||
debug_assert!(self.previous_sibling().is_none());
|
||||
Siblings(Some(State {
|
||||
next: self.clone(),
|
||||
next_back: self.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
.rev()
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s siblings before it.
|
||||
#[inline]
|
||||
pub fn preceding_siblings(&self) -> Rev<Siblings> {
|
||||
match (self.parent(), self.previous_sibling()) {
|
||||
(Some(parent), Some(previous_sibling)) => {
|
||||
let first_sibling = parent.first_child().unwrap();
|
||||
Siblings(Some(State {
|
||||
next: first_sibling,
|
||||
next_back: previous_sibling,
|
||||
}))
|
||||
}
|
||||
_ => Siblings(None),
|
||||
}
|
||||
.rev()
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node and the siblings after it.
|
||||
#[inline]
|
||||
pub fn inclusive_following_siblings(&self) -> Siblings {
|
||||
match self.parent() {
|
||||
Some(parent) => {
|
||||
let last_sibling = parent.last_child().unwrap();
|
||||
debug_assert!(self.next_sibling().is_some() || *self == last_sibling);
|
||||
Siblings(Some(State {
|
||||
next: self.clone(),
|
||||
next_back: last_sibling,
|
||||
}))
|
||||
}
|
||||
None => {
|
||||
debug_assert!(self.next_sibling().is_none());
|
||||
Siblings(Some(State {
|
||||
next: self.clone(),
|
||||
next_back: self.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s siblings after it.
|
||||
#[inline]
|
||||
pub fn following_siblings(&self) -> Siblings {
|
||||
match (self.parent(), self.next_sibling()) {
|
||||
(Some(parent), Some(next_sibling)) => {
|
||||
let last_sibling = parent.last_child().unwrap();
|
||||
Siblings(Some(State {
|
||||
next: next_sibling,
|
||||
next_back: last_sibling,
|
||||
}))
|
||||
}
|
||||
_ => Siblings(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s children.
|
||||
#[inline]
|
||||
pub fn children(&self) -> Siblings {
|
||||
match (self.first_child(), self.last_child()) {
|
||||
(Some(first_child), Some(last_child)) => Siblings(Some(State {
|
||||
next: first_child,
|
||||
next_back: last_child,
|
||||
})),
|
||||
(None, None) => Siblings(None),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node and its descendants, in tree order.
|
||||
///
|
||||
/// Parent nodes appear before the descendants.
|
||||
///
|
||||
/// Note: this is the `NodeEdge::Start` items from `traverse()`.
|
||||
#[inline]
|
||||
pub fn inclusive_descendants(&self) -> Descendants {
|
||||
Descendants(self.traverse_inclusive())
|
||||
}
|
||||
|
||||
/// Return an iterator of references to this node’s descendants, in tree order.
|
||||
///
|
||||
/// Parent nodes appear before the descendants.
|
||||
///
|
||||
/// Note: this is the `NodeEdge::Start` items from `traverse()`.
|
||||
#[inline]
|
||||
pub fn descendants(&self) -> Descendants {
|
||||
Descendants(self.traverse())
|
||||
}
|
||||
|
||||
/// Return an iterator of the start and end edges of this node and its descendants,
|
||||
/// in tree order.
|
||||
#[inline]
|
||||
pub fn traverse_inclusive(&self) -> Traverse {
|
||||
Traverse(Some(State {
|
||||
next: NodeEdge::Start(self.clone()),
|
||||
next_back: NodeEdge::End(self.clone()),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Return an iterator of the start and end edges of this node’s descendants,
|
||||
/// in tree order.
|
||||
#[inline]
|
||||
pub fn traverse(&self) -> Traverse {
|
||||
match (self.first_child(), self.last_child()) {
|
||||
(Some(first_child), Some(last_child)) => Traverse(Some(State {
|
||||
next: NodeEdge::Start(first_child),
|
||||
next_back: NodeEdge::End(last_child),
|
||||
})),
|
||||
(None, None) => Traverse(None),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an iterator of the inclusive descendants element that match the given selector list.
|
||||
#[inline]
|
||||
pub fn select(&self, selectors: &str) -> Result<Select<Elements<Descendants>>, ()> {
|
||||
self.inclusive_descendants().select(selectors)
|
||||
}
|
||||
|
||||
/// Return the first inclusive descendants element that match the given selector list.
|
||||
#[inline]
|
||||
pub fn select_first(&self, selectors: &str) -> Result<NodeDataRef<ElementData>, ()> {
|
||||
let mut elements = self.select(selectors)?;
|
||||
elements.next().ok_or(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct State<T> {
|
||||
next: T,
|
||||
next_back: T,
|
||||
}
|
||||
|
||||
/// A double-ended iterator of sibling nodes.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Siblings(Option<State<NodeRef>>);
|
||||
|
||||
macro_rules! siblings_next {
|
||||
($next: ident, $next_back: ident, $next_sibling: ident) => {
|
||||
fn $next(&mut self) -> Option<NodeRef> {
|
||||
#![allow(non_shorthand_field_patterns)]
|
||||
self.0.take().map(|State { $next: next, $next_back: next_back }| {
|
||||
if let Some(sibling) = next.$next_sibling() {
|
||||
if next != next_back {
|
||||
self.0 = Some(State { $next: sibling, $next_back: next_back })
|
||||
}
|
||||
}
|
||||
next
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Siblings {
|
||||
type Item = NodeRef;
|
||||
siblings_next!(next, next_back, next_sibling);
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Siblings {
|
||||
siblings_next!(next_back, next, previous_sibling);
|
||||
}
|
||||
|
||||
/// An iterator on ancestor nodes.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Ancestors(Option<NodeRef>);
|
||||
|
||||
impl Iterator for Ancestors {
|
||||
type Item = NodeRef;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<NodeRef> {
|
||||
self.0.take().map(|node| {
|
||||
self.0 = node.parent();
|
||||
node
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator of references to a given node and its descendants, in tree order.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Descendants(Traverse);
|
||||
|
||||
macro_rules! descendants_next {
|
||||
($next: ident) => {
|
||||
#[inline]
|
||||
fn $next(&mut self) -> Option<NodeRef> {
|
||||
loop {
|
||||
match (self.0).$next() {
|
||||
Some(NodeEdge::Start(node)) => return Some(node),
|
||||
Some(NodeEdge::End(_)) => {}
|
||||
None => return None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Descendants {
|
||||
type Item = NodeRef;
|
||||
descendants_next!(next);
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Descendants {
|
||||
descendants_next!(next_back);
|
||||
}
|
||||
|
||||
/// Marks either the start or the end of a node.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub enum NodeEdge<T> {
|
||||
/// Indicates that start of a node that has children.
|
||||
/// Yielded by `Traverse::next` before the node’s descendants.
|
||||
/// In HTML or XML, this corresponds to an opening tag like `<div>`
|
||||
Start(T),
|
||||
|
||||
/// Indicates that end of a node that has children.
|
||||
/// Yielded by `Traverse::next` after the node’s descendants.
|
||||
/// In HTML or XML, this corresponds to a closing tag like `</div>`
|
||||
End(T),
|
||||
}
|
||||
|
||||
/// An iterator of the start and end edges of the nodes in a given subtree.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Traverse(Option<State<NodeEdge<NodeRef>>>);
|
||||
|
||||
macro_rules! traverse_next {
|
||||
($next: ident, $next_back: ident, $first_child: ident, $next_sibling: ident, $Start: ident, $End: ident) => {
|
||||
fn $next(&mut self) -> Option<NodeEdge<NodeRef>> {
|
||||
#![allow(non_shorthand_field_patterns)]
|
||||
self.0.take().map(|State { $next: next, $next_back: next_back }| {
|
||||
if next != next_back {
|
||||
self.0 = match next {
|
||||
NodeEdge::$Start(ref node) => {
|
||||
match node.$first_child() {
|
||||
Some(child) => {
|
||||
Some(State { $next: NodeEdge::$Start(child), $next_back: next_back })
|
||||
}
|
||||
None => Some(State { $next: NodeEdge::$End(node.clone()), $next_back: next_back })
|
||||
}
|
||||
}
|
||||
NodeEdge::$End(ref node) => {
|
||||
match node.$next_sibling() {
|
||||
Some(sibling) => {
|
||||
Some(State { $next: NodeEdge::$Start(sibling), $next_back: next_back })
|
||||
}
|
||||
None => node.parent().map(|parent| {
|
||||
State { $next: NodeEdge::$End(parent), $next_back: next_back }
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
next
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Traverse {
|
||||
type Item = NodeEdge<NodeRef>;
|
||||
traverse_next!(next, next_back, first_child, next_sibling, Start, End);
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Traverse {
|
||||
traverse_next!(next_back, next, last_child, previous_sibling, End, Start);
|
||||
}
|
||||
|
||||
macro_rules! filter_map_like_iterator {
|
||||
(#[$doc: meta] $name: ident: $f: expr, $from: ty => $to: ty) => {
|
||||
#[$doc]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct $name<I>(pub I);
|
||||
|
||||
impl<I> Iterator for $name<I>
|
||||
where
|
||||
I: Iterator<Item = $from>,
|
||||
{
|
||||
type Item = $to;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<$to> {
|
||||
for x in self.0.by_ref() {
|
||||
if let Some(y) = ($f)(x) {
|
||||
return Some(y);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> DoubleEndedIterator for $name<I>
|
||||
where
|
||||
I: DoubleEndedIterator<Item = $from>,
|
||||
{
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<$to> {
|
||||
for x in self.0.by_ref().rev() {
|
||||
if let Some(y) = ($f)(x) {
|
||||
return Some(y);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
filter_map_like_iterator! {
|
||||
/// A node iterator adaptor that yields element nodes.
|
||||
Elements: NodeRef::into_element_ref, NodeRef => NodeDataRef<ElementData>
|
||||
}
|
||||
|
||||
filter_map_like_iterator! {
|
||||
/// A node iterator adaptor that yields comment nodes.
|
||||
Comments: NodeRef::into_comment_ref, NodeRef => NodeDataRef<RefCell<String>>
|
||||
}
|
||||
|
||||
filter_map_like_iterator! {
|
||||
/// A node iterator adaptor that yields text nodes.
|
||||
TextNodes: NodeRef::into_text_ref, NodeRef => NodeDataRef<RefCell<String>>
|
||||
}
|
||||
|
||||
/// An element iterator adaptor that yields elements maching given selectors.
|
||||
pub struct Select<I, S = Selectors>
|
||||
where
|
||||
I: Iterator<Item = NodeDataRef<ElementData>>,
|
||||
S: Borrow<Selectors>,
|
||||
{
|
||||
/// The underlying iterator.
|
||||
pub iter: I,
|
||||
|
||||
/// The selectors to be matched.
|
||||
pub selectors: S,
|
||||
}
|
||||
|
||||
impl<I, S> Iterator for Select<I, S>
|
||||
where
|
||||
I: Iterator<Item = NodeDataRef<ElementData>>,
|
||||
S: Borrow<Selectors>,
|
||||
{
|
||||
type Item = NodeDataRef<ElementData>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<NodeDataRef<ElementData>> {
|
||||
for element in self.iter.by_ref() {
|
||||
if self.selectors.borrow().matches(&element) {
|
||||
return Some(element);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, S> DoubleEndedIterator for Select<I, S>
|
||||
where
|
||||
I: DoubleEndedIterator<Item = NodeDataRef<ElementData>>,
|
||||
S: Borrow<Selectors>,
|
||||
{
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<NodeDataRef<ElementData>> {
|
||||
for element in self.iter.by_ref().rev() {
|
||||
if self.selectors.borrow().matches(&element) {
|
||||
return Some(element);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience methods for node iterators.
|
||||
pub trait NodeIterator: Sized + Iterator<Item = NodeRef> {
|
||||
/// Filter this element iterator to elements.
|
||||
#[inline]
|
||||
fn elements(self) -> Elements<Self> {
|
||||
Elements(self)
|
||||
}
|
||||
|
||||
/// Filter this node iterator to text nodes.
|
||||
#[inline]
|
||||
fn text_nodes(self) -> TextNodes<Self> {
|
||||
TextNodes(self)
|
||||
}
|
||||
|
||||
/// Filter this node iterator to comment nodes.
|
||||
#[inline]
|
||||
fn comments(self) -> Comments<Self> {
|
||||
Comments(self)
|
||||
}
|
||||
|
||||
/// Filter this node iterator to elements maching the given selectors.
|
||||
#[inline]
|
||||
fn select(self, selectors: &str) -> Result<Select<Elements<Self>>, ()> {
|
||||
self.elements().select(selectors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience methods for element iterators.
|
||||
pub trait ElementIterator: Sized + Iterator<Item = NodeDataRef<ElementData>> {
|
||||
/// Filter this element iterator to elements maching the given selectors.
|
||||
#[inline]
|
||||
fn select(self, selectors: &str) -> Result<Select<Self>, ()> {
|
||||
Selectors::compile(selectors).map(|s| Select {
|
||||
iter: self,
|
||||
selectors: s,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> NodeIterator for I where I: Iterator<Item = NodeRef> {}
|
||||
impl<I> ElementIterator for I where I: Iterator<Item = NodeDataRef<ElementData>> {}
|
|
@ -0,0 +1,40 @@
|
|||
/*!
|
||||
|
||||
Kuchiki (朽木), a HTML/XML tree manipulation library for Rust.
|
||||
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate html5ever;
|
||||
#[macro_use]
|
||||
extern crate matches;
|
||||
|
||||
mod attributes;
|
||||
mod cell_extras;
|
||||
pub mod iter;
|
||||
mod node_data_ref;
|
||||
mod parser;
|
||||
mod select;
|
||||
mod serializer;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
mod tree;
|
||||
|
||||
pub use attributes::{Attribute, Attributes, ExpandedName};
|
||||
pub use node_data_ref::NodeDataRef;
|
||||
pub use parser::{parse_html, parse_html_with_options, parse_fragment, ParseOpts, Sink};
|
||||
pub use select::{Selector, Selectors, Specificity};
|
||||
pub use tree::{Doctype, DocumentData, ElementData, Node, NodeData, NodeRef};
|
||||
|
||||
/// This module re-exports a number of traits that are useful when using Kuchiki.
|
||||
/// It can be used with:
|
||||
///
|
||||
/// ```rust
|
||||
/// use kuchiki::traits::*;
|
||||
/// ```
|
||||
pub mod traits {
|
||||
pub use html5ever::tendril::TendrilSink;
|
||||
pub use crate::iter::{ElementIterator, NodeIterator};
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
use std::cell::RefCell;
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
use crate::tree::{Doctype, DocumentData, ElementData, Node, NodeRef};
|
||||
|
||||
impl NodeRef {
|
||||
/// If this node is an element, return a strong reference to element-specific data.
|
||||
#[inline]
|
||||
pub fn into_element_ref(self) -> Option<NodeDataRef<ElementData>> {
|
||||
NodeDataRef::new_opt(self, Node::as_element)
|
||||
}
|
||||
|
||||
/// If this node is a text node, return a strong reference to its contents.
|
||||
#[inline]
|
||||
pub fn into_text_ref(self) -> Option<NodeDataRef<RefCell<String>>> {
|
||||
NodeDataRef::new_opt(self, Node::as_text)
|
||||
}
|
||||
|
||||
/// If this node is a comment, return a strong reference to its contents.
|
||||
#[inline]
|
||||
pub fn into_comment_ref(self) -> Option<NodeDataRef<RefCell<String>>> {
|
||||
NodeDataRef::new_opt(self, Node::as_comment)
|
||||
}
|
||||
|
||||
/// If this node is a doctype, return a strong reference to doctype-specific data.
|
||||
#[inline]
|
||||
pub fn into_doctype_ref(self) -> Option<NodeDataRef<Doctype>> {
|
||||
NodeDataRef::new_opt(self, Node::as_doctype)
|
||||
}
|
||||
|
||||
/// If this node is a document, return a strong reference to document-specific data.
|
||||
#[inline]
|
||||
pub fn into_document_ref(self) -> Option<NodeDataRef<DocumentData>> {
|
||||
NodeDataRef::new_opt(self, Node::as_document)
|
||||
}
|
||||
}
|
||||
|
||||
/// Holds a strong reference to a node, but dereferences to some component inside of it.
|
||||
#[derive(Eq)]
|
||||
pub struct NodeDataRef<T> {
|
||||
_keep_alive: NodeRef,
|
||||
_reference: *const T,
|
||||
}
|
||||
|
||||
impl<T> NodeDataRef<T> {
|
||||
/// Create a `NodeDataRef` for a component in a given node.
|
||||
#[inline]
|
||||
pub fn new<F>(rc: NodeRef, f: F) -> NodeDataRef<T>
|
||||
where
|
||||
F: FnOnce(&Node) -> &T,
|
||||
{
|
||||
NodeDataRef {
|
||||
_reference: f(&*rc),
|
||||
_keep_alive: rc,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a `NodeDataRef` for and a component that may or may not be in a given node.
|
||||
#[inline]
|
||||
pub fn new_opt<F>(rc: NodeRef, f: F) -> Option<NodeDataRef<T>>
|
||||
where
|
||||
F: FnOnce(&Node) -> Option<&T>,
|
||||
{
|
||||
f(&*rc).map(|r| r as *const T).map(move |r| NodeDataRef {
|
||||
_reference: r,
|
||||
_keep_alive: rc,
|
||||
})
|
||||
}
|
||||
|
||||
/// Access the corresponding node.
|
||||
#[inline]
|
||||
pub fn as_node(&self) -> &NodeRef {
|
||||
&self._keep_alive
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for NodeDataRef<T> {
|
||||
type Target = T;
|
||||
#[inline]
|
||||
fn deref(&self) -> &T {
|
||||
unsafe { &*self._reference }
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(PartialEq)] would compare both fields
|
||||
impl<T> PartialEq for NodeDataRef<T> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self._keep_alive == other._keep_alive
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(Clone)] would have an unnecessary `T: Clone` bound
|
||||
impl<T> Clone for NodeDataRef<T> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
NodeDataRef {
|
||||
_keep_alive: self._keep_alive.clone(),
|
||||
_reference: self._reference,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug> fmt::Debug for NodeDataRef<T> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
fmt::Debug::fmt(&**self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeDataRef<ElementData> {
|
||||
/// Return the concatenation of all text nodes in this subtree.
|
||||
pub fn text_contents(&self) -> String {
|
||||
self.as_node().text_contents()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,241 @@
|
|||
use html5ever::tendril::StrTendril;
|
||||
use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
|
||||
use html5ever::{self, Attribute, ExpandedName, QualName};
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::attributes;
|
||||
use crate::tree::NodeRef;
|
||||
|
||||
/// Options for the HTML parser.
|
||||
#[derive(Default)]
|
||||
pub struct ParseOpts {
|
||||
/// Options for the HTML tokenizer.
|
||||
pub tokenizer: html5ever::tokenizer::TokenizerOpts,
|
||||
|
||||
/// Options for the HTML tree builder.
|
||||
pub tree_builder: html5ever::tree_builder::TreeBuilderOpts,
|
||||
|
||||
/// A callback for HTML parse errors (which are never fatal).
|
||||
pub on_parse_error: Option<Box<dyn FnMut(Cow<'static, str>)>>,
|
||||
}
|
||||
|
||||
/// Parse an HTML document with html5ever and the default configuration.
|
||||
pub fn parse_html() -> html5ever::Parser<Sink> {
|
||||
parse_html_with_options(ParseOpts::default())
|
||||
}
|
||||
|
||||
/// Parse an HTML document with html5ever with custom configuration.
|
||||
pub fn parse_html_with_options(opts: ParseOpts) -> html5ever::Parser<Sink> {
|
||||
let sink = Sink {
|
||||
document_node: NodeRef::new_document(),
|
||||
on_parse_error: opts.on_parse_error,
|
||||
};
|
||||
let html5opts = html5ever::ParseOpts {
|
||||
tokenizer: opts.tokenizer,
|
||||
tree_builder: opts.tree_builder,
|
||||
};
|
||||
html5ever::parse_document(sink, html5opts)
|
||||
}
|
||||
|
||||
/// Parse an HTML fragment with html5ever and the default configuration.
|
||||
pub fn parse_fragment(ctx_name: QualName, ctx_attr: Vec<Attribute>) -> html5ever::Parser<Sink> {
|
||||
parse_fragment_with_options(ParseOpts::default(), ctx_name, ctx_attr)
|
||||
}
|
||||
|
||||
/// Parse an HTML fragment with html5ever with custom configuration.
|
||||
pub fn parse_fragment_with_options(opts: ParseOpts, ctx_name: QualName, ctx_attr: Vec<Attribute>) -> html5ever::Parser<Sink> {
|
||||
let sink = Sink {
|
||||
document_node: NodeRef::new_document(),
|
||||
on_parse_error: opts.on_parse_error,
|
||||
};
|
||||
let html5opts = html5ever::ParseOpts {
|
||||
tokenizer: opts.tokenizer,
|
||||
tree_builder: opts.tree_builder,
|
||||
};
|
||||
html5ever::parse_fragment(sink, html5opts, ctx_name, ctx_attr)
|
||||
}
|
||||
|
||||
/// Receives new tree nodes during parsing.
|
||||
pub struct Sink {
|
||||
document_node: NodeRef,
|
||||
on_parse_error: Option<Box<dyn FnMut(Cow<'static, str>)>>,
|
||||
}
|
||||
|
||||
impl TreeSink for Sink {
|
||||
type Output = NodeRef;
|
||||
|
||||
fn finish(self) -> NodeRef {
|
||||
self.document_node
|
||||
}
|
||||
|
||||
type Handle = NodeRef;
|
||||
|
||||
#[inline]
|
||||
fn parse_error(&mut self, message: Cow<'static, str>) {
|
||||
if let Some(ref mut handler) = self.on_parse_error {
|
||||
handler(message)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_document(&mut self) -> NodeRef {
|
||||
self.document_node.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
||||
self.document_node
|
||||
.as_document()
|
||||
.unwrap()
|
||||
._quirks_mode
|
||||
.set(mode)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn same_node(&self, x: &NodeRef, y: &NodeRef) -> bool {
|
||||
x == y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn elem_name<'a>(&self, target: &'a NodeRef) -> ExpandedName<'a> {
|
||||
target.as_element().unwrap().name.expanded()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_element(
|
||||
&mut self,
|
||||
name: QualName,
|
||||
attrs: Vec<Attribute>,
|
||||
_flags: ElementFlags,
|
||||
) -> NodeRef {
|
||||
NodeRef::new_element(
|
||||
name,
|
||||
attrs.into_iter().map(|attr| {
|
||||
let Attribute {
|
||||
name: QualName { prefix, ns, local },
|
||||
value,
|
||||
} = attr;
|
||||
let value = String::from(value);
|
||||
(
|
||||
attributes::ExpandedName { ns, local },
|
||||
attributes::Attribute { prefix, value },
|
||||
)
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_comment(&mut self, text: StrTendril) -> NodeRef {
|
||||
NodeRef::new_comment(text)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeRef {
|
||||
NodeRef::new_processing_instruction(target, data)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn append(&mut self, parent: &NodeRef, child: NodeOrText<NodeRef>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(node) => parent.append(node),
|
||||
NodeOrText::AppendText(text) => {
|
||||
if let Some(last_child) = parent.last_child() {
|
||||
if let Some(existing) = last_child.as_text() {
|
||||
existing.borrow_mut().push_str(&text);
|
||||
return;
|
||||
}
|
||||
}
|
||||
parent.append(NodeRef::new_text(text))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn append_before_sibling(&mut self, sibling: &NodeRef, child: NodeOrText<NodeRef>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(node) => sibling.insert_before(node),
|
||||
NodeOrText::AppendText(text) => {
|
||||
if let Some(previous_sibling) = sibling.previous_sibling() {
|
||||
if let Some(existing) = previous_sibling.as_text() {
|
||||
existing.borrow_mut().push_str(&text);
|
||||
return;
|
||||
}
|
||||
}
|
||||
sibling.insert_before(NodeRef::new_text(text))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
name: StrTendril,
|
||||
public_id: StrTendril,
|
||||
system_id: StrTendril,
|
||||
) {
|
||||
self.document_node
|
||||
.append(NodeRef::new_doctype(name, public_id, system_id))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn add_attrs_if_missing(&mut self, target: &NodeRef, attrs: Vec<Attribute>) {
|
||||
let element = target.as_element().unwrap();
|
||||
let mut attributes = element.attributes.borrow_mut();
|
||||
|
||||
for Attribute {
|
||||
name: QualName { prefix, ns, local },
|
||||
value,
|
||||
} in attrs
|
||||
{
|
||||
attributes
|
||||
.map
|
||||
.entry(attributes::ExpandedName { ns, local })
|
||||
.or_insert_with(|| {
|
||||
let value = String::from(value);
|
||||
attributes::Attribute { prefix, value }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn remove_from_parent(&mut self, target: &NodeRef) {
|
||||
target.detach()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reparent_children(&mut self, node: &NodeRef, new_parent: &NodeRef) {
|
||||
// FIXME: Can this be done more effciently in rctree,
|
||||
// by moving the whole linked list of children at once?
|
||||
for child in node.children() {
|
||||
new_parent.append(child)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mark_script_already_started(&mut self, _node: &NodeRef) {
|
||||
// FIXME: Is this useful outside of a browser?
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_template_contents(&mut self, target: &NodeRef) -> NodeRef {
|
||||
target
|
||||
.as_element()
|
||||
.unwrap()
|
||||
.template_contents
|
||||
.clone()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
element: &NodeRef,
|
||||
prev_element: &NodeRef,
|
||||
child: NodeOrText<NodeRef>,
|
||||
) {
|
||||
if element.parent().is_some() {
|
||||
self.append_before_sibling(element, child)
|
||||
} else {
|
||||
self.append(prev_element, child)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,433 @@
|
|||
use crate::attributes::ExpandedName;
|
||||
use cssparser::{self, CowRcStr, ParseError, SourceLocation, ToCss};
|
||||
use html5ever::{LocalName, Namespace};
|
||||
use crate::iter::{NodeIterator, Select};
|
||||
use crate::node_data_ref::NodeDataRef;
|
||||
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
|
||||
use selectors::context::QuirksMode;
|
||||
use selectors::parser::SelectorParseErrorKind;
|
||||
use selectors::parser::{
|
||||
NonTSPseudoClass, Parser, Selector as GenericSelector, SelectorImpl, SelectorList,
|
||||
};
|
||||
use selectors::{self, matching, OpaqueElement};
|
||||
use std::fmt;
|
||||
use crate::tree::{ElementData, Node, NodeData, NodeRef};
|
||||
|
||||
/// The definition of whitespace per CSS Selectors Level 3 § 4.
|
||||
///
|
||||
/// Copied from rust-selectors.
|
||||
static SELECTOR_WHITESPACE: &[char] = &[' ', '\t', '\n', '\r', '\x0C'];
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KuchikiSelectors;
|
||||
|
||||
impl SelectorImpl for KuchikiSelectors {
|
||||
type AttrValue = String;
|
||||
type Identifier = LocalName;
|
||||
type ClassName = LocalName;
|
||||
type LocalName = LocalName;
|
||||
type PartName = LocalName;
|
||||
type NamespacePrefix = LocalName;
|
||||
type NamespaceUrl = Namespace;
|
||||
type BorrowedNamespaceUrl = Namespace;
|
||||
type BorrowedLocalName = LocalName;
|
||||
|
||||
type NonTSPseudoClass = PseudoClass;
|
||||
type PseudoElement = PseudoElement;
|
||||
|
||||
type ExtraMatchingData = ();
|
||||
}
|
||||
|
||||
struct KuchikiParser;
|
||||
|
||||
impl<'i> Parser<'i> for KuchikiParser {
|
||||
type Impl = KuchikiSelectors;
|
||||
type Error = SelectorParseErrorKind<'i>;
|
||||
|
||||
fn parse_non_ts_pseudo_class(
|
||||
&self,
|
||||
location: SourceLocation,
|
||||
name: CowRcStr<'i>,
|
||||
) -> Result<PseudoClass, ParseError<'i, SelectorParseErrorKind<'i>>> {
|
||||
use self::PseudoClass::*;
|
||||
if name.eq_ignore_ascii_case("any-link") {
|
||||
Ok(AnyLink)
|
||||
} else if name.eq_ignore_ascii_case("link") {
|
||||
Ok(Link)
|
||||
} else if name.eq_ignore_ascii_case("visited") {
|
||||
Ok(Visited)
|
||||
} else if name.eq_ignore_ascii_case("active") {
|
||||
Ok(Active)
|
||||
} else if name.eq_ignore_ascii_case("focus") {
|
||||
Ok(Focus)
|
||||
} else if name.eq_ignore_ascii_case("hover") {
|
||||
Ok(Hover)
|
||||
} else if name.eq_ignore_ascii_case("enabled") {
|
||||
Ok(Enabled)
|
||||
} else if name.eq_ignore_ascii_case("disabled") {
|
||||
Ok(Disabled)
|
||||
} else if name.eq_ignore_ascii_case("checked") {
|
||||
Ok(Checked)
|
||||
} else if name.eq_ignore_ascii_case("indeterminate") {
|
||||
Ok(Indeterminate)
|
||||
} else {
|
||||
Err(
|
||||
location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement(
|
||||
name,
|
||||
)),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Hash)]
|
||||
pub enum PseudoClass {
|
||||
AnyLink,
|
||||
Link,
|
||||
Visited,
|
||||
Active,
|
||||
Focus,
|
||||
Hover,
|
||||
Enabled,
|
||||
Disabled,
|
||||
Checked,
|
||||
Indeterminate,
|
||||
}
|
||||
|
||||
impl NonTSPseudoClass for PseudoClass {
|
||||
type Impl = KuchikiSelectors;
|
||||
|
||||
fn is_active_or_hover(&self) -> bool {
|
||||
matches!(*self, PseudoClass::Active | PseudoClass::Hover)
|
||||
}
|
||||
|
||||
fn is_user_action_state(&self) -> bool {
|
||||
matches!(*self, PseudoClass::Active | PseudoClass::Hover | PseudoClass::Focus)
|
||||
}
|
||||
|
||||
fn has_zero_specificity(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl ToCss for PseudoClass {
|
||||
fn to_css<W>(&self, dest: &mut W) -> fmt::Result
|
||||
where
|
||||
W: fmt::Write,
|
||||
{
|
||||
dest.write_str(match *self {
|
||||
PseudoClass::AnyLink => ":any-link",
|
||||
PseudoClass::Link => ":link",
|
||||
PseudoClass::Visited => ":visited",
|
||||
PseudoClass::Active => ":active",
|
||||
PseudoClass::Focus => ":focus",
|
||||
PseudoClass::Hover => ":hover",
|
||||
PseudoClass::Enabled => ":enabled",
|
||||
PseudoClass::Disabled => ":disabled",
|
||||
PseudoClass::Checked => ":checked",
|
||||
PseudoClass::Indeterminate => ":indeterminate",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Hash)]
|
||||
pub enum PseudoElement {}
|
||||
|
||||
impl ToCss for PseudoElement {
|
||||
fn to_css<W>(&self, _dest: &mut W) -> fmt::Result
|
||||
where
|
||||
W: fmt::Write,
|
||||
{
|
||||
match *self {}
|
||||
}
|
||||
}
|
||||
|
||||
impl selectors::parser::PseudoElement for PseudoElement {
|
||||
type Impl = KuchikiSelectors;
|
||||
}
|
||||
|
||||
impl selectors::Element for NodeDataRef<ElementData> {
|
||||
type Impl = KuchikiSelectors;
|
||||
|
||||
#[inline]
|
||||
fn opaque(&self) -> OpaqueElement {
|
||||
let node: &Node = self.as_node();
|
||||
OpaqueElement::new(node)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_html_slot_element(&self) -> bool {
|
||||
false
|
||||
}
|
||||
#[inline]
|
||||
fn parent_node_is_shadow_root(&self) -> bool {
|
||||
false
|
||||
}
|
||||
#[inline]
|
||||
fn containing_shadow_host(&self) -> Option<Self> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn parent_element(&self) -> Option<Self> {
|
||||
self.as_node().parent().and_then(NodeRef::into_element_ref)
|
||||
}
|
||||
#[inline]
|
||||
fn prev_sibling_element(&self) -> Option<Self> {
|
||||
self.as_node().preceding_siblings().elements().next()
|
||||
}
|
||||
#[inline]
|
||||
fn next_sibling_element(&self) -> Option<Self> {
|
||||
self.as_node().following_siblings().elements().next()
|
||||
}
|
||||
#[inline]
|
||||
fn is_empty(&self) -> bool {
|
||||
self.as_node().children().all(|child| match *child.data() {
|
||||
NodeData::Element(_) => false,
|
||||
NodeData::Text(ref text) => text.borrow().is_empty(),
|
||||
_ => true,
|
||||
})
|
||||
}
|
||||
#[inline]
|
||||
fn is_root(&self) -> bool {
|
||||
match self.as_node().parent() {
|
||||
None => false,
|
||||
Some(parent) => matches!(*parent.data(), NodeData::Document(_)),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_html_element_in_html_document(&self) -> bool {
|
||||
// FIXME: Have a notion of HTML document v.s. XML document?
|
||||
self.name.ns == ns!(html)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_local_name(&self, name: &LocalName) -> bool {
|
||||
self.name.local == *name
|
||||
}
|
||||
#[inline]
|
||||
fn has_namespace(&self, namespace: &Namespace) -> bool {
|
||||
self.name.ns == *namespace
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_part(&self, _name: &LocalName) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn exported_part(&self, _: &LocalName) -> Option<LocalName> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imported_part(&self, _: &LocalName) -> Option<LocalName> {
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_pseudo_element(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_same_type(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_link(&self) -> bool {
|
||||
self.name.ns == ns!(html)
|
||||
&& matches!(
|
||||
self.name.local,
|
||||
local_name!("a") | local_name!("area") | local_name!("link")
|
||||
)
|
||||
&& self
|
||||
.attributes
|
||||
.borrow()
|
||||
.map
|
||||
.contains_key(&ExpandedName::new(ns!(), local_name!("href")))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_id(&self, id: &LocalName, case_sensitivity: CaseSensitivity) -> bool {
|
||||
self.attributes
|
||||
.borrow()
|
||||
.get(local_name!("id"))
|
||||
.map_or(false, |id_attr| {
|
||||
case_sensitivity.eq(id.as_bytes(), id_attr.as_bytes())
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_class(&self, name: &LocalName, case_sensitivity: CaseSensitivity) -> bool {
|
||||
let name = name.as_bytes();
|
||||
!name.is_empty()
|
||||
&& if let Some(class_attr) = self.attributes.borrow().get(local_name!("class")) {
|
||||
class_attr
|
||||
.split(SELECTOR_WHITESPACE)
|
||||
.any(|class| case_sensitivity.eq(class.as_bytes(), name))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn attr_matches(
|
||||
&self,
|
||||
ns: &NamespaceConstraint<&Namespace>,
|
||||
local_name: &LocalName,
|
||||
operation: &AttrSelectorOperation<&String>,
|
||||
) -> bool {
|
||||
let attrs = self.attributes.borrow();
|
||||
match *ns {
|
||||
NamespaceConstraint::Any => attrs
|
||||
.map
|
||||
.iter()
|
||||
.any(|(name, attr)| name.local == *local_name && operation.eval_str(&attr.value)),
|
||||
NamespaceConstraint::Specific(ns_url) => attrs
|
||||
.map
|
||||
.get(&ExpandedName::new(ns_url, local_name.clone()))
|
||||
.map_or(false, |attr| operation.eval_str(&attr.value)),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_pseudo_element(
|
||||
&self,
|
||||
pseudo: &PseudoElement,
|
||||
_context: &mut matching::MatchingContext<KuchikiSelectors>,
|
||||
) -> bool {
|
||||
match *pseudo {}
|
||||
}
|
||||
|
||||
fn match_non_ts_pseudo_class<F>(
|
||||
&self,
|
||||
pseudo: &PseudoClass,
|
||||
_context: &mut matching::MatchingContext<KuchikiSelectors>,
|
||||
_flags_setter: &mut F,
|
||||
) -> bool
|
||||
where
|
||||
F: FnMut(&Self, matching::ElementSelectorFlags),
|
||||
{
|
||||
use self::PseudoClass::*;
|
||||
match *pseudo {
|
||||
Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => {
|
||||
false
|
||||
}
|
||||
AnyLink | Link => {
|
||||
self.name.ns == ns!(html)
|
||||
&& matches!(
|
||||
self.name.local,
|
||||
local_name!("a") | local_name!("area") | local_name!("link")
|
||||
)
|
||||
&& self.attributes.borrow().contains(local_name!("href"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A pre-compiled list of CSS Selectors.
|
||||
pub struct Selectors(pub Vec<Selector>);
|
||||
|
||||
/// A pre-compiled CSS Selector.
|
||||
pub struct Selector(GenericSelector<KuchikiSelectors>);
|
||||
|
||||
/// The specificity of a selector.
|
||||
///
|
||||
/// Opaque, but ordered.
|
||||
///
|
||||
/// Determines precedence in the cascading algorithm.
|
||||
/// When equal, a rule later in source order takes precedence.
|
||||
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub struct Specificity(u32);
|
||||
|
||||
impl Selectors {
|
||||
/// Compile a list of selectors. This may fail on syntax errors or unsupported selectors.
|
||||
#[inline]
|
||||
pub fn compile(s: &str) -> Result<Selectors, ()> {
|
||||
let mut input = cssparser::ParserInput::new(s);
|
||||
match SelectorList::parse(&KuchikiParser, &mut cssparser::Parser::new(&mut input)) {
|
||||
Ok(list) => Ok(Selectors(list.0.into_iter().map(Selector).collect())),
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the given element matches this list of selectors.
|
||||
#[inline]
|
||||
pub fn matches(&self, element: &NodeDataRef<ElementData>) -> bool {
|
||||
self.0.iter().any(|s| s.matches(element))
|
||||
}
|
||||
|
||||
/// Filter an element iterator, yielding those matching this list of selectors.
|
||||
#[inline]
|
||||
pub fn filter<I>(&self, iter: I) -> Select<I, &Selectors>
|
||||
where
|
||||
I: Iterator<Item = NodeDataRef<ElementData>>,
|
||||
{
|
||||
Select {
|
||||
iter,
|
||||
selectors: self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Selector {
|
||||
/// Returns whether the given element matches this selector.
|
||||
#[inline]
|
||||
pub fn matches(&self, element: &NodeDataRef<ElementData>) -> bool {
|
||||
let mut context = matching::MatchingContext::new(
|
||||
matching::MatchingMode::Normal,
|
||||
None,
|
||||
None,
|
||||
QuirksMode::NoQuirks,
|
||||
);
|
||||
matching::matches_selector(&self.0, 0, None, element, &mut context, &mut |_, _| {})
|
||||
}
|
||||
|
||||
/// Return the specificity of this selector.
|
||||
pub fn specificity(&self) -> Specificity {
|
||||
Specificity(self.0.specificity())
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::str::FromStr for Selectors {
|
||||
type Err = ();
|
||||
#[inline]
|
||||
fn from_str(s: &str) -> Result<Selectors, ()> {
|
||||
Selectors::compile(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Selector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.0.to_css(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Selectors {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut iter = self.0.iter();
|
||||
let first = iter
|
||||
.next()
|
||||
.expect("Empty Selectors, should contain at least one selector");
|
||||
first.0.to_css(f)?;
|
||||
for selector in iter {
|
||||
f.write_str(", ")?;
|
||||
selector.0.to_css(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Selector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Selectors {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
use html5ever::serialize::TraversalScope::*;
|
||||
use html5ever::serialize::{serialize, Serialize, SerializeOpts, Serializer, TraversalScope};
|
||||
use html5ever::QualName;
|
||||
use std::fs::File;
|
||||
use std::io::{Result, Write};
|
||||
use std::path::Path;
|
||||
use std::string::ToString;
|
||||
|
||||
use crate::tree::{NodeData, NodeRef};
|
||||
|
||||
impl Serialize for NodeRef {
|
||||
fn serialize<S: Serializer>(
|
||||
&self,
|
||||
serializer: &mut S,
|
||||
traversal_scope: TraversalScope,
|
||||
) -> Result<()> {
|
||||
match (traversal_scope, self.data()) {
|
||||
(ref scope, &NodeData::Element(ref element)) => {
|
||||
if *scope == IncludeNode {
|
||||
let attrs = element.attributes.borrow();
|
||||
|
||||
// Unfortunately we need to allocate something to hold these &'a QualName
|
||||
let attrs = attrs
|
||||
.map
|
||||
.iter()
|
||||
.map(|(name, attr)| {
|
||||
(
|
||||
QualName::new(
|
||||
attr.prefix.clone(),
|
||||
name.ns.clone(),
|
||||
name.local.clone(),
|
||||
),
|
||||
&attr.value,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
serializer.start_elem(
|
||||
element.name.clone(),
|
||||
attrs.iter().map(|&(ref name, value)| (name, &**value)),
|
||||
)?
|
||||
}
|
||||
|
||||
for child in self.children() {
|
||||
Serialize::serialize(&child, serializer, IncludeNode)?
|
||||
}
|
||||
|
||||
if *scope == IncludeNode {
|
||||
serializer.end_elem(element.name.clone())?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
(_, &NodeData::DocumentFragment) | (_, &NodeData::Document(_)) => {
|
||||
for child in self.children() {
|
||||
Serialize::serialize(&child, serializer, IncludeNode)?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
(ChildrenOnly(_), _) => Ok(()),
|
||||
|
||||
(IncludeNode, &NodeData::Doctype(ref doctype)) => {
|
||||
serializer.write_doctype(&doctype.name)
|
||||
}
|
||||
(IncludeNode, &NodeData::Text(ref text)) => serializer.write_text(&text.borrow()),
|
||||
(IncludeNode, &NodeData::Comment(ref text)) => serializer.write_comment(&text.borrow()),
|
||||
(IncludeNode, &NodeData::ProcessingInstruction(ref contents)) => {
|
||||
let contents = contents.borrow();
|
||||
serializer.write_processing_instruction(&contents.0, &contents.1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for NodeRef {
|
||||
#[inline]
|
||||
fn to_string(&self) -> String {
|
||||
let mut u8_vec = Vec::new();
|
||||
self.serialize(&mut u8_vec).unwrap();
|
||||
String::from_utf8(u8_vec).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeRef {
|
||||
/// Serialize this node and its descendants in HTML syntax to the given stream.
|
||||
#[inline]
|
||||
pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<()> {
|
||||
serialize(
|
||||
writer,
|
||||
self,
|
||||
SerializeOpts {
|
||||
traversal_scope: IncludeNode,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Serialize this node and its descendants in HTML syntax to a new file at the given path.
|
||||
#[inline]
|
||||
pub fn serialize_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
|
||||
let mut file = File::create(&path)?;
|
||||
self.serialize(&mut file)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
use html5ever::tree_builder::QuirksMode;
|
||||
use html5ever::QualName;
|
||||
use std::path::Path;
|
||||
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::parser::{parse_html, parse_fragment};
|
||||
use crate::select::*;
|
||||
use crate::traits::*;
|
||||
|
||||
#[test]
|
||||
fn text_nodes() {
|
||||
let html = r"
|
||||
<!doctype html>
|
||||
<title>Test case</title>
|
||||
<p>Content contains <b>Important</b> data</p>";
|
||||
let document = parse_html().one(html);
|
||||
let paragraph = document.select("p").unwrap().collect::<Vec<_>>();
|
||||
assert_eq!(paragraph.len(), 1);
|
||||
assert_eq!(
|
||||
paragraph[0].text_contents(),
|
||||
"Content contains Important data"
|
||||
);
|
||||
let texts = paragraph[0]
|
||||
.as_node()
|
||||
.descendants()
|
||||
.text_nodes()
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(texts.len(), 3);
|
||||
assert_eq!(&*texts[0].borrow(), "Content contains ");
|
||||
assert_eq!(&*texts[1].borrow(), "Important");
|
||||
assert_eq!(&*texts[2].borrow(), " data");
|
||||
{
|
||||
let mut x = texts[0].borrow_mut();
|
||||
x.truncate(0);
|
||||
x.push_str("Content doesn't contain ");
|
||||
}
|
||||
assert_eq!(&*texts[0].borrow(), "Content doesn't contain ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_and_serialize() {
|
||||
let html = r"
|
||||
<!doctype html>
|
||||
<title>Test case</title>
|
||||
<p>Content";
|
||||
let document = parse_html().one(html);
|
||||
assert_eq!(
|
||||
document.as_document().unwrap().quirks_mode(),
|
||||
QuirksMode::NoQuirks
|
||||
);
|
||||
assert_eq!(
|
||||
document.to_string(),
|
||||
r"<!DOCTYPE html><html><head><title>Test case</title>
|
||||
</head><body><p>Content</p></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_and_serialize_fragment() {
|
||||
let html = r"<tbody><tr><td>Test case";
|
||||
|
||||
let ctx_name = QualName::new(None, ns!(html), local_name!("tbody"));
|
||||
let document = parse_fragment(ctx_name, vec![]).one(html);
|
||||
assert_eq!(document.as_document().unwrap().quirks_mode(), QuirksMode::NoQuirks);
|
||||
assert_eq!(document.to_string(), r"<html><tr><td>Test case</td></tr></html>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_file() {
|
||||
let mut path = Path::new(env!("CARGO_MANIFEST_DIR")).to_path_buf();
|
||||
path.push("test_data".to_string());
|
||||
path.push("foo.html");
|
||||
|
||||
let html = r"<!DOCTYPE html><html><head>
|
||||
<title>Test case</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Foo</p>
|
||||
|
||||
|
||||
</body></html>";
|
||||
let document = parse_html().from_utf8().from_file(&path).unwrap();
|
||||
assert_eq!(document.to_string(), html);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_and_read_file() {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let mut path = tempdir.path().to_path_buf();
|
||||
path.push("temp.html");
|
||||
|
||||
let html = r"<!DOCTYPE html><html><head><title>Title</title></head><body>Body</body></html>";
|
||||
let document = parse_html().one(html);
|
||||
let _ = document.serialize_to_file(path.clone());
|
||||
|
||||
let document2 = parse_html().from_utf8().from_file(&path).unwrap();
|
||||
assert_eq!(document.to_string(), document2.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select() {
|
||||
let html = r"
|
||||
<title>Test case</title>
|
||||
<p class=foo>Foo
|
||||
<p>Bar
|
||||
<p class=foo>Foo
|
||||
";
|
||||
|
||||
let document = parse_html().one(html);
|
||||
let matching = document.select("p.foo").unwrap().collect::<Vec<_>>();
|
||||
assert_eq!(matching.len(), 2);
|
||||
let child = matching[0].as_node().first_child().unwrap();
|
||||
assert_eq!(&**child.as_text().unwrap().borrow(), "Foo\n");
|
||||
assert_eq!(matching[0].attributes.borrow().get("class"), Some("foo"));
|
||||
assert_eq!(
|
||||
matching[0].attributes.borrow().get(local_name!("class")),
|
||||
Some("foo")
|
||||
);
|
||||
|
||||
let selectors = Selectors::compile("p.foo").unwrap();
|
||||
let matching2 = selectors
|
||||
.filter(document.descendants().elements())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(matching, matching2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_first() {
|
||||
let html = r"
|
||||
<title>Test case</title>
|
||||
<p class=foo>Foo
|
||||
<p>Bar
|
||||
<p class=foo>Baz
|
||||
";
|
||||
|
||||
let document = parse_html().one(html);
|
||||
let matching = document.select_first("p.foo").unwrap();
|
||||
let child = matching.as_node().first_child().unwrap();
|
||||
assert_eq!(&**child.as_text().unwrap().borrow(), "Foo\n");
|
||||
assert_eq!(matching.attributes.borrow().get("class"), Some("foo"));
|
||||
assert_eq!(
|
||||
matching.attributes.borrow().get(local_name!("class")),
|
||||
Some("foo")
|
||||
);
|
||||
|
||||
assert!(document.select_first("p.bar").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_string() {
|
||||
let html = r"<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test case</title>
|
||||
</head>
|
||||
<body>
|
||||
<p class=foo>Foo
|
||||
</body>
|
||||
</html>";
|
||||
|
||||
let document = parse_html().one(html);
|
||||
assert_eq!(
|
||||
document
|
||||
.inclusive_descendants()
|
||||
.nth(11)
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
"<p class=\"foo\">Foo\n \n</p>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn specificity() {
|
||||
let selectors = Selectors::compile(".example, :first-child, div").unwrap();
|
||||
let specificities = selectors
|
||||
.0
|
||||
.iter()
|
||||
.map(|s| s.specificity())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(specificities.len(), 3);
|
||||
assert!(specificities[0] == specificities[1]);
|
||||
assert!(specificities[0] > specificities[2]);
|
||||
assert!(specificities[1] > specificities[2]);
|
||||
}
|
|
@ -0,0 +1,489 @@
|
|||
use html5ever::tree_builder::QuirksMode;
|
||||
use html5ever::QualName;
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
use std::rc::{Rc, Weak};
|
||||
|
||||
use crate::attributes::{Attribute, Attributes, ExpandedName};
|
||||
use crate::cell_extras::*;
|
||||
use crate::iter::NodeIterator;
|
||||
|
||||
/// Node data specific to the node type.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum NodeData {
|
||||
/// Element node
|
||||
Element(ElementData),
|
||||
|
||||
/// Text node
|
||||
Text(RefCell<String>),
|
||||
|
||||
/// Comment node
|
||||
Comment(RefCell<String>),
|
||||
|
||||
/// Processing instruction node
|
||||
ProcessingInstruction(RefCell<(String, String)>),
|
||||
|
||||
/// Doctype node
|
||||
Doctype(Doctype),
|
||||
|
||||
/// Document node
|
||||
Document(DocumentData),
|
||||
|
||||
/// Document fragment node
|
||||
DocumentFragment,
|
||||
}
|
||||
|
||||
/// Data specific to doctype nodes.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Doctype {
|
||||
/// The name of the doctype
|
||||
pub name: String,
|
||||
|
||||
/// The public ID of the doctype
|
||||
pub public_id: String,
|
||||
|
||||
/// The system ID of the doctype
|
||||
pub system_id: String,
|
||||
}
|
||||
|
||||
/// Data specific to element nodes.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ElementData {
|
||||
/// The namespace and local name of the element, such as `ns!(html)` and `body`.
|
||||
pub name: QualName,
|
||||
|
||||
/// The attributes of the elements.
|
||||
pub attributes: RefCell<Attributes>,
|
||||
|
||||
/// If the element is an HTML `<template>` element,
|
||||
/// the document fragment node that is the root of template contents.
|
||||
pub template_contents: Option<NodeRef>,
|
||||
}
|
||||
|
||||
/// Data specific to document nodes.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct DocumentData {
|
||||
#[doc(hidden)]
|
||||
pub _quirks_mode: Cell<QuirksMode>,
|
||||
}
|
||||
|
||||
impl DocumentData {
|
||||
/// The quirks mode of the document, as determined by the HTML parser.
|
||||
#[inline]
|
||||
pub fn quirks_mode(&self) -> QuirksMode {
|
||||
self._quirks_mode.get()
|
||||
}
|
||||
}
|
||||
|
||||
/// A strong reference to a node.
|
||||
///
|
||||
/// A node is destroyed when the last strong reference to it dropped.
|
||||
///
|
||||
/// Each node holds a strong reference to its first child and next sibling (if any),
|
||||
/// but only a weak reference to its last child, previous sibling, and parent.
|
||||
/// This is to avoid strong reference cycles, which would cause memory leaks.
|
||||
///
|
||||
/// As a result, a single `NodeRef` is sufficient to keep alive a node
|
||||
/// and nodes that are after it in tree order
|
||||
/// (its descendants, its following siblings, and their descendants)
|
||||
/// but not other nodes in a tree.
|
||||
///
|
||||
/// To avoid detroying nodes prematurely,
|
||||
/// programs typically hold a strong reference to the root of a document
|
||||
/// until they’re done with that document.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NodeRef(pub Rc<Node>);
|
||||
|
||||
impl Deref for NodeRef {
|
||||
type Target = Node;
|
||||
#[inline]
|
||||
fn deref(&self) -> &Node {
|
||||
&*self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for NodeRef {}
|
||||
impl PartialEq for NodeRef {
|
||||
#[inline]
|
||||
fn eq(&self, other: &NodeRef) -> bool {
|
||||
let a: *const Node = &*self.0;
|
||||
let b: *const Node = &*other.0;
|
||||
a == b
|
||||
}
|
||||
}
|
||||
|
||||
/// A node inside a DOM-like tree.
|
||||
pub struct Node {
|
||||
parent: Cell<Option<Weak<Node>>>,
|
||||
previous_sibling: Cell<Option<Weak<Node>>>,
|
||||
next_sibling: Cell<Option<Rc<Node>>>,
|
||||
first_child: Cell<Option<Rc<Node>>>,
|
||||
last_child: Cell<Option<Weak<Node>>>,
|
||||
data: NodeData,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Node {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "{:?} @ {:?}", self.data, self as *const Node)
|
||||
}
|
||||
}
|
||||
|
||||
/// Prevent implicit recursion when dropping nodes to avoid overflowing the stack.
|
||||
///
|
||||
/// The implicit drop is correct, but recursive.
|
||||
/// In the worst case (where no node has both a next sibling and a child),
|
||||
/// a tree of a few tens of thousands of nodes could cause a stack overflow.
|
||||
///
|
||||
/// This `Drop` implementations makes sure the recursion does not happen.
|
||||
/// Instead, it has an explicit `Vec<Rc<Node>>` stack to traverse the subtree,
|
||||
/// but only following `Rc<Node>` references that are "unique":
|
||||
/// that have a strong reference count of 1.
|
||||
/// Those are the nodes that would have been dropped recursively.
|
||||
///
|
||||
/// The stack holds ancestors of the current node rather than preceding siblings,
|
||||
/// on the assumption that large document trees are typically wider than deep.
|
||||
impl Drop for Node {
|
||||
fn drop(&mut self) {
|
||||
// `.take_if_unique_strong()` temporarily leaves the tree in an inconsistent state,
|
||||
// as the corresponding `Weak` reference in the other direction is not removed.
|
||||
// It is important that all `Some(_)` strong references it returns
|
||||
// are dropped by the end of this `drop` call,
|
||||
// and that no user code is invoked in-between.
|
||||
|
||||
// Sharing `stack` between these two calls is not necessary,
|
||||
// but it allows re-using memory allocations.
|
||||
let mut stack = Vec::new();
|
||||
if let Some(rc) = self.first_child.take_if_unique_strong() {
|
||||
non_recursive_drop_unique_rc(rc, &mut stack);
|
||||
}
|
||||
if let Some(rc) = self.next_sibling.take_if_unique_strong() {
|
||||
non_recursive_drop_unique_rc(rc, &mut stack);
|
||||
}
|
||||
|
||||
fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
|
||||
loop {
|
||||
if let Some(child) = rc.first_child.take_if_unique_strong() {
|
||||
stack.push(rc);
|
||||
rc = child;
|
||||
continue;
|
||||
}
|
||||
if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
|
||||
// The previous value of `rc: Rc<Node>` is dropped here.
|
||||
// Since it was unique, the corresponding `Node` is dropped as well.
|
||||
// `<Node as Drop>::drop` does not call `drop_rc`
|
||||
// as both the first child and next sibling were already taken.
|
||||
// Weak reference counts decremented here for `Cell`s that are `Some`:
|
||||
// * `rc.parent`: still has a strong reference in `stack` or elsewhere
|
||||
// * `rc.last_child`: this is the last weak ref. Deallocated now.
|
||||
// * `rc.previous_sibling`: this is the last weak ref. Deallocated now.
|
||||
rc = sibling;
|
||||
continue;
|
||||
}
|
||||
if let Some(parent) = stack.pop() {
|
||||
// Same as in the above comment.
|
||||
rc = parent;
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeRef {
|
||||
/// Create a new node.
|
||||
#[inline]
|
||||
pub fn new(data: NodeData) -> NodeRef {
|
||||
NodeRef(Rc::new(Node {
|
||||
parent: Cell::new(None),
|
||||
first_child: Cell::new(None),
|
||||
last_child: Cell::new(None),
|
||||
previous_sibling: Cell::new(None),
|
||||
next_sibling: Cell::new(None),
|
||||
data,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Create a new element node.
|
||||
#[inline]
|
||||
pub fn new_element<I>(name: QualName, attributes: I) -> NodeRef
|
||||
where
|
||||
I: IntoIterator<Item = (ExpandedName, Attribute)>,
|
||||
{
|
||||
NodeRef::new(NodeData::Element(ElementData {
|
||||
template_contents: if name.expanded() == expanded_name!(html "template") {
|
||||
Some(NodeRef::new(NodeData::DocumentFragment))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
name,
|
||||
attributes: RefCell::new(Attributes {
|
||||
map: attributes.into_iter().collect(),
|
||||
}),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Create a new text node.
|
||||
#[inline]
|
||||
pub fn new_text<T: Into<String>>(value: T) -> NodeRef {
|
||||
NodeRef::new(NodeData::Text(RefCell::new(value.into())))
|
||||
}
|
||||
|
||||
/// Create a new comment node.
|
||||
#[inline]
|
||||
pub fn new_comment<T: Into<String>>(value: T) -> NodeRef {
|
||||
NodeRef::new(NodeData::Comment(RefCell::new(value.into())))
|
||||
}
|
||||
|
||||
/// Create a new processing instruction node.
|
||||
#[inline]
|
||||
pub fn new_processing_instruction<T1, T2>(target: T1, data: T2) -> NodeRef
|
||||
where
|
||||
T1: Into<String>,
|
||||
T2: Into<String>,
|
||||
{
|
||||
NodeRef::new(NodeData::ProcessingInstruction(RefCell::new((
|
||||
target.into(),
|
||||
data.into(),
|
||||
))))
|
||||
}
|
||||
|
||||
/// Create a new doctype node.
|
||||
#[inline]
|
||||
pub fn new_doctype<T1, T2, T3>(name: T1, public_id: T2, system_id: T3) -> NodeRef
|
||||
where
|
||||
T1: Into<String>,
|
||||
T2: Into<String>,
|
||||
T3: Into<String>,
|
||||
{
|
||||
NodeRef::new(NodeData::Doctype(Doctype {
|
||||
name: name.into(),
|
||||
public_id: public_id.into(),
|
||||
system_id: system_id.into(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Create a new document node.
|
||||
#[inline]
|
||||
pub fn new_document() -> NodeRef {
|
||||
NodeRef::new(NodeData::Document(DocumentData {
|
||||
_quirks_mode: Cell::new(QuirksMode::NoQuirks),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Return the concatenation of all text nodes in this subtree.
|
||||
pub fn text_contents(&self) -> String {
|
||||
let mut s = String::new();
|
||||
for text_node in self.inclusive_descendants().text_nodes() {
|
||||
s.push_str(&text_node.borrow());
|
||||
}
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Return a reference to this node’s node-type-specific data.
|
||||
#[inline]
|
||||
pub fn data(&self) -> &NodeData {
|
||||
&self.data
|
||||
}
|
||||
|
||||
/// If this node is an element, return a reference to element-specific data.
|
||||
#[inline]
|
||||
pub fn as_element(&self) -> Option<&ElementData> {
|
||||
match self.data {
|
||||
NodeData::Element(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a text node, return a reference to its contents.
|
||||
#[inline]
|
||||
pub fn as_text(&self) -> Option<&RefCell<String>> {
|
||||
match self.data {
|
||||
NodeData::Text(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a comment, return a reference to its contents.
|
||||
#[inline]
|
||||
pub fn as_comment(&self) -> Option<&RefCell<String>> {
|
||||
match self.data {
|
||||
NodeData::Comment(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a document, return a reference to doctype-specific data.
|
||||
#[inline]
|
||||
pub fn as_doctype(&self) -> Option<&Doctype> {
|
||||
match self.data {
|
||||
NodeData::Doctype(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this node is a document, return a reference to document-specific data.
|
||||
#[inline]
|
||||
pub fn as_document(&self) -> Option<&DocumentData> {
|
||||
match self.data {
|
||||
NodeData::Document(ref value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a reference to the parent node, unless this node is the root of the tree.
|
||||
#[inline]
|
||||
pub fn parent(&self) -> Option<NodeRef> {
|
||||
self.parent.upgrade().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the first child of this node, unless it has no child.
|
||||
#[inline]
|
||||
pub fn first_child(&self) -> Option<NodeRef> {
|
||||
self.first_child.clone_inner().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the last child of this node, unless it has no child.
|
||||
#[inline]
|
||||
pub fn last_child(&self) -> Option<NodeRef> {
|
||||
self.last_child.upgrade().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the previous sibling of this node, unless it is a first child.
|
||||
#[inline]
|
||||
pub fn previous_sibling(&self) -> Option<NodeRef> {
|
||||
self.previous_sibling.upgrade().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Return a reference to the next sibling of this node, unless it is a last child.
|
||||
#[inline]
|
||||
pub fn next_sibling(&self) -> Option<NodeRef> {
|
||||
self.next_sibling.clone_inner().map(NodeRef)
|
||||
}
|
||||
|
||||
/// Detach a node from its parent and siblings. Children are not affected.
|
||||
///
|
||||
/// To remove a node and its descendants, detach it and drop any strong reference to it.
|
||||
pub fn detach(&self) {
|
||||
let parent_weak = self.parent.take();
|
||||
let previous_sibling_weak = self.previous_sibling.take();
|
||||
let next_sibling_strong = self.next_sibling.take();
|
||||
|
||||
let previous_sibling_opt = previous_sibling_weak
|
||||
.as_ref()
|
||||
.and_then(|weak| weak.upgrade());
|
||||
|
||||
if let Some(next_sibling_ref) = next_sibling_strong.as_ref() {
|
||||
next_sibling_ref
|
||||
.previous_sibling
|
||||
.replace(previous_sibling_weak);
|
||||
} else if let Some(parent_ref) = parent_weak.as_ref() {
|
||||
if let Some(parent_strong) = parent_ref.upgrade() {
|
||||
parent_strong.last_child.replace(previous_sibling_weak);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(previous_sibling_strong) = previous_sibling_opt {
|
||||
previous_sibling_strong
|
||||
.next_sibling
|
||||
.replace(next_sibling_strong);
|
||||
} else if let Some(parent_ref) = parent_weak.as_ref() {
|
||||
if let Some(parent_strong) = parent_ref.upgrade() {
|
||||
parent_strong.first_child.replace(next_sibling_strong);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeRef {
|
||||
/// Append a new child to this node, after existing children.
|
||||
///
|
||||
/// The new child is detached from its previous position.
|
||||
pub fn append(&self, new_child: NodeRef) {
|
||||
new_child.detach();
|
||||
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
|
||||
if let Some(last_child_weak) = self.last_child.replace(Some(Rc::downgrade(&new_child.0))) {
|
||||
if let Some(last_child) = last_child_weak.upgrade() {
|
||||
new_child.previous_sibling.replace(Some(last_child_weak));
|
||||
debug_assert!(last_child.next_sibling.is_none());
|
||||
last_child.next_sibling.replace(Some(new_child.0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
debug_assert!(self.first_child.is_none());
|
||||
self.first_child.replace(Some(new_child.0));
|
||||
}
|
||||
|
||||
/// Prepend a new child to this node, before existing children.
|
||||
///
|
||||
/// The new child is detached from its previous position.
|
||||
pub fn prepend(&self, new_child: NodeRef) {
|
||||
new_child.detach();
|
||||
new_child.parent.replace(Some(Rc::downgrade(&self.0)));
|
||||
if let Some(first_child) = self.first_child.take() {
|
||||
debug_assert!(first_child.previous_sibling.is_none());
|
||||
first_child
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&new_child.0)));
|
||||
new_child.next_sibling.replace(Some(first_child));
|
||||
} else {
|
||||
debug_assert!(self.first_child.is_none());
|
||||
self.last_child.replace(Some(Rc::downgrade(&new_child.0)));
|
||||
}
|
||||
self.first_child.replace(Some(new_child.0));
|
||||
}
|
||||
|
||||
/// Insert a new sibling after this node.
|
||||
///
|
||||
/// The new sibling is detached from its previous position.
|
||||
pub fn insert_after(&self, new_sibling: NodeRef) {
|
||||
new_sibling.detach();
|
||||
new_sibling.parent.replace(self.parent.clone_inner());
|
||||
new_sibling
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&self.0)));
|
||||
if let Some(next_sibling) = self.next_sibling.take() {
|
||||
debug_assert!(next_sibling.previous_sibling().unwrap() == *self);
|
||||
next_sibling
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&new_sibling.0)));
|
||||
new_sibling.next_sibling.replace(Some(next_sibling));
|
||||
} else if let Some(parent) = self.parent() {
|
||||
debug_assert!(parent.last_child().unwrap() == *self);
|
||||
parent
|
||||
.last_child
|
||||
.replace(Some(Rc::downgrade(&new_sibling.0)));
|
||||
}
|
||||
self.next_sibling.replace(Some(new_sibling.0));
|
||||
}
|
||||
|
||||
/// Insert a new sibling before this node.
|
||||
///
|
||||
/// The new sibling is detached from its previous position.
|
||||
pub fn insert_before(&self, new_sibling: NodeRef) {
|
||||
new_sibling.detach();
|
||||
new_sibling.parent.replace(self.parent.clone_inner());
|
||||
new_sibling.next_sibling.replace(Some(self.0.clone()));
|
||||
if let Some(previous_sibling_weak) = self
|
||||
.previous_sibling
|
||||
.replace(Some(Rc::downgrade(&new_sibling.0)))
|
||||
{
|
||||
if let Some(previous_sibling) = previous_sibling_weak.upgrade() {
|
||||
new_sibling
|
||||
.previous_sibling
|
||||
.replace(Some(previous_sibling_weak));
|
||||
debug_assert!(previous_sibling.next_sibling().unwrap() == *self);
|
||||
previous_sibling.next_sibling.replace(Some(new_sibling.0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if let Some(parent) = self.parent() {
|
||||
debug_assert!(parent.first_child().unwrap() == *self);
|
||||
parent.first_child.replace(Some(new_sibling.0));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test case</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Foo</p>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue