flams_ftml/parser/
termsnotations.rs

1use either::Either;
2use flams_ontology::{
3    content::terms::{ArgMode, Informal, Term},
4    narration::notations::{NotationComponent, OpNotation},
5};
6use ftml_extraction::{
7    open::OpenFTMLElement,
8    prelude::{FTMLNode, NotationSpec},
9};
10use html5ever::{
11    interface::ElemName,
12    serialize::{HtmlSerializer, SerializeOpts, Serializer, TraversalScope},
13};
14
15use crate::parser::nodes::ElementData;
16
17use super::nodes::NodeRef;
18
19impl NodeRef {
20    pub(super) fn do_notation(&self) -> NotationSpec {
21        self.as_element().map_or_else(
22            || {
23                let mut ret = "<span>".to_string();
24                ret.push_str(&self.string());
25                ret.push_str("</span>");
26                NotationSpec {
27                    attribute_index: 5,
28                    inner_index: 6,
29                    components: vec![NotationComponent::S(ret.into())].into_boxed_slice(),
30                    is_text: true,
31                }
32            },
33            |n| {
34                let (is_text, attribute_index, inner_index) = get_is_text_and_offsets(n);
35                let mut ret = Vec::new();
36                let mut strng = Vec::new();
37                let _ = rec(self, &mut ret, &mut strng);
38                if !strng.is_empty() {
39                    ret.push(NotationComponent::S(
40                        String::from_utf8_lossy(&strng)
41                            .into_owned()
42                            .into_boxed_str(),
43                    ));
44                }
45                NotationSpec {
46                    attribute_index,
47                    inner_index,
48                    components: ret.into_boxed_slice(),
49                    is_text,
50                }
51            },
52        )
53    }
54
55    pub(super) fn do_op_notation(&self) -> OpNotation {
56        self.as_element().map_or_else(
57            || todo!("should be impossible"),
58            |n| {
59                let (is_text, attribute_index, inner_index) = get_is_text_and_offsets(n);
60                let s = self.string();
61                OpNotation {
62                    attribute_index,
63                    inner_index,
64                    text: s.into(),
65                    is_text,
66                }
67            },
68        )
69    }
70
71    #[allow(clippy::cast_possible_truncation)]
72    pub(super) fn do_term(&self) -> Term {
73        if let Some(elem) = self.as_element() {
74            if let Some(mut ftml) = elem.ftml.take() {
75                if let Some(i) = ftml
76                    .iter()
77                    .position(|e| matches!(e, OpenFTMLElement::ClosedTerm(_)))
78                {
79                    let OpenFTMLElement::ClosedTerm(t) = ftml.elems.remove(i) else {
80                        unreachable!()
81                    };
82                    return t;
83                }
84                elem.ftml.set(Some(ftml));
85            }
86            /*if self.children().count() == 1 && self.first_child().is_some_and(|e| e.as_element().is_some()) {
87              return self.first_child().unwrap_or_else(|| unreachable!()).do_term()
88            }*/
89            let tag = elem.name.local.to_string();
90            let attrs = elem
91                .attributes
92                .borrow()
93                .0
94                .iter()
95                .map(|(k, v)| {
96                    (
97                        k.local.to_string().into_boxed_str(),
98                        v.to_string().into_boxed_str(),
99                    )
100                })
101                .collect::<Vec<_>>()
102                .into_boxed_slice();
103            let mut terms = Vec::new();
104            let mut children = Vec::new();
105            for c in self.children() {
106                if let Some(t) = c.as_text() {
107                    let t = t.borrow();
108                    let t = t.trim();
109                    if t.is_empty() {
110                        continue;
111                    }
112                    children.push(Informal::Text(t.to_string().into_boxed_str()));
113                } else if c.as_element().is_some() {
114                    let l = terms.len() as u8;
115                    match c.as_term() {
116                        Term::Informal {
117                            tag,
118                            attributes,
119                            children: mut chs,
120                            terms: tms,
121                        } => {
122                            terms.extend(tms.into_vec().into_iter());
123                            for c in &mut chs {
124                                if let Some(iter) = c.iter_mut_opt() {
125                                    for c in iter {
126                                        if let Informal::Term(ref mut u) = c {
127                                            *u += l;
128                                        }
129                                    }
130                                }
131                            }
132                            children.push(Informal::Node {
133                                tag,
134                                attributes,
135                                children: chs,
136                            });
137                        }
138                        t => {
139                            terms.push(t);
140                            children.push(Informal::Term(l));
141                        }
142                    }
143                }
144            }
145            Term::Informal {
146                tag,
147                attributes: attrs,
148                children: children.into_boxed_slice(),
149                terms: terms.into_boxed_slice(),
150            }
151        } else {
152            unreachable!("This should not happen")
153        }
154    }
155}
156
157#[allow(clippy::too_many_lines)]
158fn rec(node: &NodeRef, ret: &mut Vec<NotationComponent>, currstr: &mut Vec<u8>) -> (u8, ArgMode) {
159    let mut index = 0;
160    let tp = ArgMode::Normal;
161    let mut ser = HtmlSerializer::new(
162        currstr,
163        SerializeOpts {
164            traversal_scope: TraversalScope::IncludeNode,
165            ..Default::default()
166        },
167    );
168    if let Some(elem) = node.as_element() {
169        if let Some(s) = elem.ftml.take() {
170            for e in &s.elems {
171                match e {
172                    OpenFTMLElement::Comp => {
173                        if !ser.writer.is_empty() {
174                            ret.push(NotationComponent::S(
175                                String::from_utf8_lossy(&std::mem::take(ser.writer))
176                                    .into_owned()
177                                    .into_boxed_str(),
178                            ));
179                        }
180                        ret.push(NotationComponent::Comp(node.string().into_boxed_str()));
181                        return (index, tp);
182                    }
183                    OpenFTMLElement::MainComp => {
184                        if !ser.writer.is_empty() {
185                            ret.push(NotationComponent::S(
186                                String::from_utf8_lossy(&std::mem::take(ser.writer))
187                                    .into_owned()
188                                    .into_boxed_str(),
189                            ));
190                        }
191                        ret.push(NotationComponent::MainComp(node.string().into_boxed_str()));
192                        return (index, tp);
193                    }
194                    OpenFTMLElement::Arg(arg) => {
195                        if !ser.writer.is_empty() {
196                            ret.push(NotationComponent::S(
197                                String::from_utf8_lossy(&std::mem::take(ser.writer))
198                                    .into_owned()
199                                    .into_boxed_str(),
200                            ));
201                        }
202                        index = match arg.index {
203                            Either::Left(u) | Either::Right((u, _)) => u,
204                        };
205                        ret.push(NotationComponent::Arg(index, arg.mode));
206                        return (index, arg.mode);
207                    }
208                    OpenFTMLElement::ArgSep => {
209                        if !ser.writer.is_empty() {
210                            ret.push(NotationComponent::S(
211                                String::from_utf8_lossy(&std::mem::take(ser.writer))
212                                    .into_owned()
213                                    .into_boxed_str(),
214                            ));
215                        }
216                        let mut separator = Vec::new();
217                        let mut nret = Vec::new();
218                        let mut idx = 0;
219                        let mut new_mode = ArgMode::Sequence;
220                        for c in node.children() {
221                            let (r, m) = rec(&c, &mut nret, &mut separator);
222                            if r != 0 {
223                                idx = r;
224                                new_mode = m;
225                            }
226                        }
227                        if !separator.is_empty() {
228                            nret.push(NotationComponent::S(
229                                String::from_utf8_lossy(&separator)
230                                    .into_owned()
231                                    .into_boxed_str(),
232                            ));
233                        }
234                        ret.push(NotationComponent::ArgSep {
235                            index: idx,
236                            mode: new_mode,
237                            sep: nret.into_boxed_slice(),
238                        });
239                        return (index, tp);
240                    }
241                    OpenFTMLElement::ArgMap => {
242                        if !ser.writer.is_empty() {
243                            ret.push(NotationComponent::S(
244                                String::from_utf8_lossy(&std::mem::take(ser.writer))
245                                    .into_owned()
246                                    .into_boxed_str(),
247                            ));
248                        }
249                        let mut separator = Vec::new();
250                        let mut nret = Vec::new();
251                        let mut idx = 0;
252                        //let mut new_mode = ArgMode::Sequence;
253                        for c in node.children() {
254                            let (r, _) = rec(&c, &mut nret, &mut separator);
255                            if r != 0 {
256                                idx = r; //new_mode = m;
257                            }
258                        }
259                        if !separator.is_empty() {
260                            nret.push(NotationComponent::S(
261                                String::from_utf8_lossy(&separator)
262                                    .into_owned()
263                                    .into_boxed_str(),
264                            ));
265                        }
266                        ret.push(NotationComponent::ArgMap {
267                            index: idx,
268                            segments: nret.into_boxed_slice(),
269                        });
270                        return (index, tp);
271                    }
272                    // TODO ArgMapSep
273                    _ => {}
274                }
275            }
276        }
277        let attrs = elem.attributes.borrow();
278        let _ = ser.start_elem(
279            elem.name.clone(),
280            attrs.0.iter().map(|(name, value)| (name, &**value)),
281        );
282        drop(attrs);
283        for c in node.children() {
284            if let Some(t) = c.as_text() {
285                let t = t.borrow();
286                let _ = ser.write_text(&t);
287            } else if c.as_element().is_some() {
288                let _ = rec(&c, ret, ser.writer);
289            }
290        }
291        let _ = ser.end_elem(elem.name.clone());
292    } else if let Some(t) = node.as_text() {
293        let t = t.borrow();
294        let _ = ser.write_text(&t);
295    }
296    (index, tp)
297}
298
299#[allow(clippy::cast_possible_truncation)]
300fn get_is_text_and_offsets(e: &ElementData) -> (bool, u8, u16) {
301    let (t, o) = match e.name.local.as_ref() {
302        s @ ("span" | "div") => (true, s.len() as u8 + 1),
303        s => (false, s.len() as u8 + 1),
304    };
305    let i = e.attributes.borrow().len() as u16 + (u16::from(o) + 1);
306    (t, o, i)
307}
308
309pub(super) fn filter_node_term(mut node: NodeRef) -> NodeRef {
310    //println!("Here: {}",node.string());
311    'outer: while let Some(e) = node.as_element() {
312        /*println!("Checking: {e:?}\nChildren:");
313        for c in node.children() {
314          println!("  - {}",c.string());
315        }*/
316        if let Some(a) = e.ftml.take() {
317            if a.iter()
318                .any(|e| matches!(e, OpenFTMLElement::ClosedTerm(_)))
319            {
320                e.ftml.set(Some(a));
321                return node;
322            }
323            e.ftml.set(Some(a));
324        }
325        let num_children = node
326            .children()
327            .filter(|n| {
328                n.as_element().is_some()
329                    || n.as_text().is_some_and(|t| !t.borrow().trim().is_empty())
330            })
331            .count();
332        if matches!(e.name.local.as_ref(), "math") && num_children == 1 {
333            if let Some(n) = node.children().find(|n| n.as_element().is_some()) {
334                node = n;
335                continue;
336            }
337        }
338        if matches!(e.name.local.as_ref(), "mrow") && num_children == 1 {
339            if let Some(n) = node.children().find(|n| n.as_element().is_some()) {
340                node = n;
341                continue;
342            }
343        }
344        if matches!(e.name.local.as_ref(), "span" | "div") && num_children == 1 {
345            if let Some(n) = node.children().find(|n| n.as_element().is_some()) {
346                for (k, v) in &e.attributes.borrow().0 .0 {
347                    let k = k.local_name().as_ref();
348                    let v = &**v;
349                    if (k == "class" && v != "rustex_contents") || k == "style" {
350                        break 'outer;
351                    }
352                }
353                node = n;
354                continue;
355            }
356        }
357        break;
358    }
359    node
360}