1use flams_backend_types::search::{SearchIndex, SearchResultKind};
2use ftml_ontology::{
3 narrative::{
4 documents::Document,
5 elements::{DocumentElementRef, LogicalParagraph},
6 },
7 utils::RefTree,
8};
9
10pub trait SearchIndexExt {
11 fn to_document(self) -> tantivy::TantivyDocument;
12}
13
14impl SearchIndexExt for SearchIndex {
15 fn to_document(self) -> tantivy::TantivyDocument {
16 let mut ret = tantivy::TantivyDocument::default();
17 let schema = crate::schema::SearchSchema::get();
18 match self {
19 Self::Document { uri, title, body } => {
20 ret.add_u64(schema.kind, SearchResultKind::Document.into());
21 ret.add_text(schema.uri, uri.to_string());
22 if let Some(t) = title {
23 ret.add_text(schema.title, t);
24 }
25 ret.add_text(schema.body, body);
26 }
27 Self::Paragraph {
28 uri,
29 kind,
30 definition_like,
31 title,
32 fors,
33 body,
34 } => {
35 ret.add_u64(schema.kind, kind.into());
36 ret.add_text(schema.uri, uri.to_string());
37 ret.add_bool(schema.def_like, definition_like);
38 for f in fors {
39 ret.add_text(schema.fors, f.to_string());
41 }
42 if let Some(t) = title {
43 ret.add_text(schema.title, t);
44 }
45 ret.add_text(schema.body, body);
46 }
47 }
48 ret
49 }
50}
51
52pub fn index_document(doc: &Document, html: &str) -> impl Iterator<Item = SearchIndex> {
53 let elems = doc.dfs().filter_map(|e| {
54 if let DocumentElementRef::Paragraph(p) = e {
55 index_paragraph(p, html)
56 } else {
57 None
58 }
59 });
60 if let Some(s) = index_document_html(doc, html) {
61 either::Left(std::iter::once(s).chain(elems))
62 } else {
63 either::Right(elems)
64 }
65}
66
67#[must_use]
68pub fn index_document_html(doc: &Document, html: &str) -> Option<SearchIndex> {
69 let title = doc.title.as_ref().map(|s| html_to_search_text(s));
70 let body = html_to_search_text(html);
71 Some(SearchIndex::Document {
72 uri: doc.uri.clone(),
73 title,
74 body,
75 })
76}
77
78pub fn index_paragraph(para: &LogicalParagraph, html: &str) -> Option<SearchIndex> {
79 crate::SPAN.in_scope(move || {
80 let title = para.title.as_ref().map(|s| html_to_search_text(s));
81 let Some(body) = html.get(para.range.start..para.range.end) else {
82 tracing::error!("Failed to plain textify body of {}", para.uri);
83 return None;
84 };
85 let body = html_to_search_text(body);
86 let fors = para.fors.iter().map(|(f, _)| f.clone()).collect();
87
88 let Ok(kind) = para.kind.try_into() else {
89 return None;
90 };
91 let definition_like = para.kind.is_definition_like(¶.styles);
92
93 Some(SearchIndex::Paragraph {
94 uri: para.uri.clone(),
95 kind,
96 definition_like,
97 title,
98 fors,
99 body,
100 })
101 })
102}
103
104#[must_use]
105pub fn html_to_search_text(html: &str) -> String {
106 fn replacer(s: &mut String) {
107 let mut i = 0;
108 loop {
109 match s.as_bytes().get(i..i + 2) {
110 None => return,
111 Some(b".\n" | b"!\n" | b":\n" | b";\n") => i += 2,
112 Some(b) if b[0] == b'\n' => {
113 s.remove(i);
114 }
115 _ => i += 1,
116 }
117 }
118 }
119 crate::SPAN.in_scope(move || {
120 match html2text::from_read(html.as_bytes(), usize::MAX / 3) {
121 Ok(mut s) => {
122 replacer(&mut s);
123 s
124 }
125 Err(e) => {
126 tracing::error!("Error: {e}");
127 html.to_string()
128 }
129 }
130 })
131}