flams_search/
index.rs

1use flams_backend_types::search::{SearchIndex, SearchResultKind};
2use ftml_ontology::{
3    narrative::{
4        documents::Document,
5        elements::{DocumentElementRef, LogicalParagraph},
6    },
7    utils::RefTree,
8};
9
10pub trait SearchIndexExt {
11    fn to_document(self) -> tantivy::TantivyDocument;
12}
13
14impl SearchIndexExt for SearchIndex {
15    fn to_document(self) -> tantivy::TantivyDocument {
16        let mut ret = tantivy::TantivyDocument::default();
17        let schema = crate::schema::SearchSchema::get();
18        match self {
19            Self::Document { uri, title, body } => {
20                ret.add_u64(schema.kind, SearchResultKind::Document.into());
21                ret.add_text(schema.uri, uri.to_string());
22                if let Some(t) = title {
23                    ret.add_text(schema.title, t);
24                }
25                ret.add_text(schema.body, body);
26            }
27            Self::Paragraph {
28                uri,
29                kind,
30                definition_like,
31                title,
32                fors,
33                body,
34            } => {
35                ret.add_u64(schema.kind, kind.into());
36                ret.add_text(schema.uri, uri.to_string());
37                ret.add_bool(schema.def_like, definition_like);
38                for f in fors {
39                    //write!(trace,"\n   FOR: {}",f);
40                    ret.add_text(schema.fors, f.to_string());
41                }
42                if let Some(t) = title {
43                    ret.add_text(schema.title, t);
44                }
45                ret.add_text(schema.body, body);
46            }
47        }
48        ret
49    }
50}
51
52pub fn index_document(doc: &Document, html: &str) -> impl Iterator<Item = SearchIndex> {
53    let elems = doc.dfs().filter_map(|e| {
54        if let DocumentElementRef::Paragraph(p) = e {
55            index_paragraph(p, html)
56        } else {
57            None
58        }
59    });
60    if let Some(s) = index_document_html(doc, html) {
61        either::Left(std::iter::once(s).chain(elems))
62    } else {
63        either::Right(elems)
64    }
65}
66
67#[must_use]
68pub fn index_document_html(doc: &Document, html: &str) -> Option<SearchIndex> {
69    let title = doc.title.as_ref().map(|s| html_to_search_text(s));
70    let body = html_to_search_text(html);
71    Some(SearchIndex::Document {
72        uri: doc.uri.clone(),
73        title,
74        body,
75    })
76}
77
78pub fn index_paragraph(para: &LogicalParagraph, html: &str) -> Option<SearchIndex> {
79    crate::SPAN.in_scope(move || {
80        let title = para.title.as_ref().map(|s| html_to_search_text(s));
81        let Some(body) = html.get(para.range.start..para.range.end) else {
82            tracing::error!("Failed to plain textify body of {}", para.uri);
83            return None;
84        };
85        let body = html_to_search_text(body);
86        let fors = para.fors.iter().map(|(f, _)| f.clone()).collect();
87
88        let Ok(kind) = para.kind.try_into() else {
89            return None;
90        };
91        let definition_like = para.kind.is_definition_like(&para.styles);
92
93        Some(SearchIndex::Paragraph {
94            uri: para.uri.clone(),
95            kind,
96            definition_like,
97            title,
98            fors,
99            body,
100        })
101    })
102}
103
104#[must_use]
105pub fn html_to_search_text(html: &str) -> String {
106    fn replacer(s: &mut String) {
107        let mut i = 0;
108        loop {
109            match s.as_bytes().get(i..i + 2) {
110                None => return,
111                Some(b".\n" | b"!\n" | b":\n" | b";\n") => i += 2,
112                Some(b) if b[0] == b'\n' => {
113                    s.remove(i);
114                }
115                _ => i += 1,
116            }
117        }
118    }
119    crate::SPAN.in_scope(move || {
120        match html2text::from_read(html.as_bytes(), usize::MAX / 3) {
121            Ok(mut s) => {
122                replacer(&mut s);
123                s
124            }
125            Err(e) => {
126                tracing::error!("Error: {e}");
127                html.to_string()
128            }
129        }
130    })
131}