1use html5ever::{
2 LocalName, Namespace, QualName,
3 interface::QuirksMode,
4 serialize::{SerializeOpts, TraversalScope},
5 tendril::StrTendril,
6};
7use std::{
8 borrow::Cow,
9 cell::{Cell, RefCell},
10 rc::{Rc, Weak},
11};
12
13pub struct Node {
15 parent: Cell<Option<Weak<Node>>>,
16 previous_sibling: Cell<Option<Weak<Node>>>,
17 next_sibling: Cell<Option<Rc<Node>>>,
18 first_child: Cell<Option<Rc<Node>>>,
19 last_child: Cell<Option<Weak<Node>>>,
20 data: NodeData,
21}
22impl std::fmt::Debug for Node {
23 #[inline]
24 #[allow(clippy::ref_as_ptr)]
25 fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
26 write!(f, "{:?} @ {:?}", self.data, self as *const Self)
27 }
28}
29
30#[derive(Clone, Debug)]
31pub struct NodeRef(pub Rc<Node>);
32
33#[derive(Debug, PartialEq, Eq, Clone)]
34pub struct Attributes(pub Vec<(QualName, StrTendril)>);
35
36#[derive(Debug)]
38pub enum NodeData {
39 Element(ElementData),
41
42 Text(RefCell<StrTendril>),
44
45 Comment(StrTendril),
47
48 ProcessingInstruction(StrTendril, StrTendril),
50
51 Doctype {
53 name: StrTendril,
54 public_id: StrTendril,
55 system_id: StrTendril,
56 },
57
58 Document(Cell<QuirksMode>),
60}
61
62pub struct ElementData {
64 pub name: QualName,
66 pub attributes: RefCell<Attributes>,
68 pub start_offset: Cell<usize>,
69 pub end_offset: Cell<usize>,
70 pub closed: Cell<bool>,
71}
72impl std::fmt::Debug for ElementData {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 write!(f, "{:?}[{:?}]", self.name, self.attributes)
75 }
76}
77
78impl Attributes {
79 pub fn len(&self) -> usize {
80 self.0
81 .iter()
82 .map(|(k, v)| {
83 k.prefix
84 .as_ref()
85 .map(|e| e.len() + ":".len())
86 .unwrap_or_default()
87 + k.local.len()
88 + " =\"\"".len()
89 + escaped_len(v, true)
90 })
91 .sum()
92 }
93
94 pub(crate) fn new_attr(&mut self, key: &str, value: String) {
95 let name = QualName::new(None, Namespace::from(""), LocalName::from(key.to_string()));
96 for (k, v) in &mut self.0 {
97 if *k == name {
98 *v = value.into();
99 return;
100 }
101 }
102 self.0.push((name, value.into()));
103 }
104}
105impl From<Vec<html5ever::Attribute>> for Attributes {
106 fn from(value: Vec<html5ever::Attribute>) -> Self {
107 Self(
108 value
109 .into_iter()
110 .map(|html5ever::Attribute { name, value }| (name, value))
111 .collect(),
112 )
113 }
114}
115
116impl NodeRef {
117 pub fn string(&self) -> Cow<'_, str> {
118 let mut html = Vec::new();
119 let _ = html5ever::serialize(
120 &mut html,
121 self,
122 SerializeOpts {
123 traversal_scope: TraversalScope::IncludeNode,
124 ..Default::default()
125 },
126 );
127 String::from_utf8_lossy(&html).into_owned().into() }
129
130 #[allow(clippy::cast_sign_loss)]
131 #[allow(clippy::cast_possible_wrap)]
132 fn len_update(&self, len: isize) {
133 if let Some(e) = self.as_element() {
134 e.end_offset
136 .set(((e.end_offset.get() as isize) + len) as usize);
137 }
138 let mut cur = self.clone();
139 while let Some(n) = cur.next_sibling() {
140 if let Some(e) = n.as_element() {
141 e.start_offset
143 .set(((e.start_offset.get() as isize) + len) as usize);
144 e.end_offset
145 .set(((e.end_offset.get() as isize) + len) as usize);
146 }
147 cur = n;
148 }
149 if let Some(p) = self.parent() {
150 p.len_update(len);
151 }
152 }
153
154 #[inline]
155 pub fn children(&self) -> Siblings {
156 match (self.first_child(), self.last_child()) {
157 (Some(first_child), Some(last_child)) => Siblings(Some(State {
158 next: first_child,
159 next_back: last_child,
160 })),
161 (None, None) => Siblings(None),
162 _ => unreachable!(),
163 }
164 }
165 pub fn len(&self) -> usize {
166 match &self.data {
167 NodeData::Comment(_) => 0, NodeData::Text(t) => t.borrow().as_bytes().len(),
169 NodeData::Element(e) => e.end_offset.get() - e.start_offset.get(),
170 NodeData::Doctype { name, .. } => "<!DOCTYPE >".len() + name.as_bytes().len(),
171 NodeData::ProcessingInstruction(target, data) => {
172 "<? >".len() + target.as_bytes().len() + data.as_bytes().len()
173 }
174 NodeData::Document(_) => self.children().map(|c| c.len()).sum(),
175 }
176 }
177
178 #[inline]
180 pub fn new(data: NodeData) -> Self {
181 Self(Rc::new(Node {
182 parent: Cell::new(None),
183 first_child: Cell::new(None),
184 last_child: Cell::new(None),
185 previous_sibling: Cell::new(None),
186 next_sibling: Cell::new(None),
187 data,
188 }))
189 }
190
191 pub fn update_len(e: &ElementData) {
192 let len = Self::base_len(&e.name) + e.attributes.borrow().len();
193 e.end_offset.set(e.start_offset.get() + len);
194 }
195
196 fn self_closing(name: &QualName) -> bool {
197 use html5ever::{local_name, ns};
198 name.ns == ns!(html)
199 && matches!(
200 name.local,
201 local_name!("area")
202 | local_name!("base")
203 | local_name!("basefont")
204 | local_name!("bgsound")
205 | local_name!("br")
206 | local_name!("col")
207 | local_name!("embed")
208 | local_name!("frame")
209 | local_name!("hr")
210 | local_name!("img")
211 | local_name!("input")
212 | local_name!("keygen")
213 | local_name!("link")
214 | local_name!("meta")
215 | local_name!("param")
216 | local_name!("source")
217 | local_name!("track")
218 | local_name!("wbr")
219 )
220 }
221
222 fn base_len(name: &QualName) -> usize {
223 let tag_len = tag_len(name);
224 if Self::self_closing(name) {
225 tag_len
226 } else {
227 2 * tag_len + 1
228 }
229 }
230
231 #[inline]
233 pub fn new_element(name: QualName, attributes: Attributes) -> Self {
234 let attrs_len: usize = attributes.len();
235 let base_len = Self::base_len(&name);
236 Self::new(NodeData::Element(ElementData {
237 name,
238 attributes: RefCell::new(attributes),
239 start_offset: Cell::new(0),
240 end_offset: Cell::new(base_len + attrs_len),
241 closed: Cell::new(false),
242 }))
243 }
244
245 #[inline]
247 pub fn new_text(value: StrTendril) -> Self {
248 Self::new(NodeData::Text(RefCell::new(value)))
249 }
250
251 #[inline]
253 pub fn new_comment(value: StrTendril) -> Self {
254 Self::new(NodeData::Comment(value))
255 }
256
257 #[inline]
259 pub fn new_processing_instruction(target: StrTendril, data: StrTendril) -> Self {
260 Self::new(NodeData::ProcessingInstruction(target, data))
261 }
262
263 #[inline]
265 pub fn new_doctype(name: StrTendril, public_id: StrTendril, system_id: StrTendril) -> Self {
266 Self::new(NodeData::Doctype {
267 name,
268 public_id,
269 system_id,
270 })
271 }
272
273 #[inline]
275 pub fn new_document() -> Self {
276 Self::new(NodeData::Document(Cell::new(QuirksMode::NoQuirks)))
277 }
278
279 pub fn append(&self, new_child: Self) {
283 new_child.detach();
284 new_child.parent.replace(Some(Rc::downgrade(&self.0)));
285 if let Some(last_child_weak) = self.last_child.replace(Some(Rc::downgrade(&new_child.0)))
286 && let Some(last_child) = last_child_weak.upgrade()
287 {
288 new_child.previous_sibling.replace(Some(last_child_weak));
289 debug_assert!(last_child.next_sibling.is_none());
290 last_child.next_sibling.replace(Some(new_child.0));
291 return;
292 }
293 debug_assert!(self.first_child.is_none());
294 self.first_child.replace(Some(new_child.0));
295 }
296}
297
298impl Node {
299 #[inline]
301 pub const fn data(&self) -> &NodeData {
302 &self.data
303 }
304
305 #[inline]
307 pub const fn as_element(&self) -> Option<&ElementData> {
308 match self.data {
309 NodeData::Element(ref value) => Some(value),
310 _ => None,
311 }
312 }
313
314 #[inline]
316 pub const fn as_document(&self) -> Option<&Cell<QuirksMode>> {
317 match self.data {
318 NodeData::Document(ref value) => Some(value),
319 _ => None,
320 }
321 }
322
323 #[inline]
325 pub const fn as_text(&self) -> Option<&RefCell<StrTendril>> {
326 match self.data {
327 NodeData::Text(ref value) => Some(value),
328 _ => None,
329 }
330 }
331
332 #[inline]
334 pub fn parent(&self) -> Option<NodeRef> {
335 self.parent.upgrade().map(NodeRef)
336 }
337
338 #[inline]
340 pub fn first_child(&self) -> Option<NodeRef> {
341 self.first_child.clone_inner().map(NodeRef)
342 }
343
344 #[inline]
346 pub fn last_child(&self) -> Option<NodeRef> {
347 self.last_child.upgrade().map(NodeRef)
348 }
349
350 #[inline]
352 pub fn previous_sibling(&self) -> Option<NodeRef> {
353 self.previous_sibling.upgrade().map(NodeRef)
354 }
355
356 #[inline]
358 pub fn next_sibling(&self) -> Option<NodeRef> {
359 self.next_sibling.clone_inner().map(NodeRef)
360 }
361
362 pub fn detach(&self) {
366 let parent_weak = self.parent.take();
367 let previous_sibling_weak = self.previous_sibling.take();
368 let next_sibling_strong = self.next_sibling.take();
369
370 let previous_sibling_opt = previous_sibling_weak.as_ref().and_then(Weak::upgrade);
371
372 if let Some(next_sibling_ref) = next_sibling_strong.as_ref() {
373 next_sibling_ref
374 .previous_sibling
375 .replace(previous_sibling_weak);
376 } else if let Some(parent_ref) = parent_weak.as_ref()
377 && let Some(parent_strong) = parent_ref.upgrade()
378 {
379 parent_strong.last_child.replace(previous_sibling_weak);
380 }
381
382 if let Some(previous_sibling_strong) = previous_sibling_opt {
383 previous_sibling_strong
384 .next_sibling
385 .replace(next_sibling_strong);
386 } else if let Some(parent_ref) = parent_weak.as_ref()
387 && let Some(parent_strong) = parent_ref.upgrade()
388 {
389 parent_strong.first_child.replace(next_sibling_strong);
390 }
391 }
392}
393
394impl std::ops::Deref for NodeRef {
395 type Target = Node;
396 #[inline]
397 fn deref(&self) -> &Node {
398 &self.0
399 }
400}
401
402impl Eq for NodeRef {}
403impl PartialEq for NodeRef {
404 #[inline]
405 fn eq(&self, other: &Self) -> bool {
406 let a: *const Node = &raw const *self.0;
407 let b: *const Node = &raw const *other.0;
408 a == b
409 }
410}
411
412impl Drop for Node {
413 fn drop(&mut self) {
414 fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
415 loop {
416 if let Some(child) = rc.first_child.take_if_unique_strong() {
417 stack.push(rc);
418 rc = child;
419 continue;
420 }
421 if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
422 rc = sibling;
431 continue;
432 }
433 if let Some(parent) = stack.pop() {
434 rc = parent;
436 continue;
437 }
438 return;
439 }
440 }
441 let mut stack = Vec::new();
450 if let Some(rc) = self.first_child.take_if_unique_strong() {
451 non_recursive_drop_unique_rc(rc, &mut stack);
452 }
453 if let Some(rc) = self.next_sibling.take_if_unique_strong() {
454 non_recursive_drop_unique_rc(rc, &mut stack);
455 }
456 }
457}
458
459#[inline]
460pub fn tag_len(name: &QualName) -> usize {
461 name.prefix
462 .as_ref()
463 .map(|e| e.len() + 1 )
464 .unwrap_or_default()
465 + name.local.len()
466 + "<>".len()
467}
468
469pub fn escaped_len(str: &str, attr_mode: bool) -> usize {
470 str.chars()
471 .map(|b| match b {
472 '&' => "&".len(),
473 '\u{00A0}' => " ".len(),
474 '"' if attr_mode => """.len(),
475 '<' if !attr_mode => "<".len(),
476 '>' if !attr_mode => ">".len(),
477 c => c.len_utf8(),
478 })
479 .sum()
480}
481
482#[derive(Debug, Clone)]
483struct State<T> {
484 next: T,
485 next_back: T,
486}
487#[derive(Debug, Clone)]
489pub struct Siblings(Option<State<NodeRef>>);
490
491macro_rules! siblings_next {
492 ($next: ident, $next_back: ident, $next_sibling: ident) => {
493 fn $next(&mut self) -> Option<NodeRef> {
494 #![allow(non_shorthand_field_patterns)]
495 self.0.take().map(
496 |State {
497 $next: next,
498 $next_back: next_back,
499 }| {
500 if let Some(sibling) = next.$next_sibling() {
501 if next != next_back {
502 self.0 = Some(State {
503 $next: sibling,
504 $next_back: next_back,
505 })
506 }
507 }
508 next
509 },
510 )
511 }
512 };
513}
514
515impl Iterator for Siblings {
516 type Item = NodeRef;
517 siblings_next!(next, next_back, next_sibling);
518}
519
520impl DoubleEndedIterator for Siblings {
521 siblings_next!(next_back, next, previous_sibling);
522}
523
524impl html5ever::serialize::Serialize for NodeRef {
525 fn serialize<S>(
526 &self,
527 serializer: &mut S,
528 traversal_scope: TraversalScope,
529 ) -> std::io::Result<()>
530 where
531 S: html5ever::serialize::Serializer,
532 {
533 match (traversal_scope, self.data()) {
534 (ref scope, NodeData::Element(element)) => {
535 if *scope == TraversalScope::IncludeNode {
536 let attrs = element.attributes.borrow();
537
538 serializer.start_elem(
539 element.name.clone(),
540 attrs.0.iter().map(|(name, value)| (name, &**value)),
541 )?;
542 }
543 let children = self.children();
544
545 for child in children {
546 html5ever::serialize::Serialize::serialize(
547 &child,
548 serializer,
549 TraversalScope::IncludeNode,
550 )?;
551 }
552
553 if *scope == TraversalScope::IncludeNode {
554 serializer.end_elem(element.name.clone())?;
555 }
556 Ok(())
557 }
558
559 (_, &NodeData::Document(_)) => {
560 for child in self.children() {
561 html5ever::serialize::Serialize::serialize(
562 &child,
563 serializer,
564 TraversalScope::IncludeNode,
565 )?;
566 }
567 Ok(())
568 }
569
570 (TraversalScope::ChildrenOnly(_), _) => Ok(()),
571
572 (TraversalScope::IncludeNode, NodeData::Doctype { name, .. }) => {
573 serializer.write_doctype(name)
574 }
575 (TraversalScope::IncludeNode, NodeData::Text(text)) => {
576 serializer.write_text(&text.borrow())
577 }
578 (TraversalScope::IncludeNode, NodeData::Comment(_text)) => Ok(()), (TraversalScope::IncludeNode, NodeData::ProcessingInstruction(target, data)) => {
580 serializer.write_processing_instruction(target, data)
581 }
582 }
583 }
584}
585
586trait CellOption {
587 fn is_none(&self) -> bool;
588}
589
590impl<T> CellOption for Cell<Option<T>> {
591 #[inline]
592 fn is_none(&self) -> bool {
593 unsafe { (*self.as_ptr()).is_none() }
594 }
595}
596
597trait CellOptionWeak<T> {
598 fn upgrade(&self) -> Option<Rc<T>>;
599}
600
601impl<T> CellOptionWeak<T> for Cell<Option<Weak<T>>> {
602 #[inline]
603 fn upgrade(&self) -> Option<Rc<T>> {
604 unsafe { (*self.as_ptr()).as_ref().and_then(Weak::upgrade) }
605 }
606}
607
608trait CellOptionRc<T> {
609 fn take_if_unique_strong(&self) -> Option<Rc<T>>;
612 fn clone_inner(&self) -> Option<Rc<T>>;
613}
614
615impl<T> CellOptionRc<T> for Cell<Option<Rc<T>>> {
616 #[inline]
617 fn take_if_unique_strong(&self) -> Option<Rc<T>> {
618 unsafe {
619 match *self.as_ptr() {
620 None => None,
621 Some(ref rc) if Rc::strong_count(rc) > 1 => None,
622 Some(_) => self.take(),
625 }
626 }
627 }
628
629 #[inline]
630 fn clone_inner(&self) -> Option<Rc<T>> {
631 unsafe { (*self.as_ptr()).clone() }
632 }
633}