flams_ontology/
search.rs

1#![allow(clippy::wildcard_imports)]
2
3use crate::{
4    narration::paragraphs::ParagraphKind,
5    uris::{DocumentElementURI, DocumentURI, SymbolURI},
6};
7
8#[allow(dead_code)]
9const fn get_true() -> bool {
10    true
11}
12
13#[allow(clippy::struct_excessive_bools)]
14#[derive(Copy, Clone, Debug, PartialEq, Eq)]
15#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
16#[cfg_attr(feature = "wasm", derive(tsify_next::Tsify))]
17#[cfg_attr(feature = "wasm", tsify(into_wasm_abi, from_wasm_abi))]
18pub struct QueryFilter {
19    #[cfg_attr(feature = "serde", serde(default = "get_true"))]
20    pub allow_documents: bool,
21    #[cfg_attr(feature = "serde", serde(default = "get_true"))]
22    pub allow_paragraphs: bool,
23    #[cfg_attr(feature = "serde", serde(default = "get_true"))]
24    pub allow_definitions: bool,
25    #[cfg_attr(feature = "serde", serde(default = "get_true"))]
26    pub allow_examples: bool,
27    #[cfg_attr(feature = "serde", serde(default = "get_true"))]
28    pub allow_assertions: bool,
29    #[cfg_attr(feature = "serde", serde(default = "get_true"))]
30    pub allow_problems: bool,
31    #[cfg_attr(feature = "serde", serde(default))]
32    pub definition_like_only: bool,
33}
34
35impl Default for QueryFilter {
36    fn default() -> Self {
37        Self {
38            allow_documents: true,
39            allow_paragraphs: true,
40            allow_definitions: true,
41            allow_examples: true,
42            allow_assertions: true,
43            allow_problems: true,
44            definition_like_only: false,
45        }
46    }
47}
48
49#[derive(Debug, Clone)]
50#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
51#[cfg_attr(feature = "wasm", derive(tsify_next::Tsify))]
52#[cfg_attr(feature = "wasm", tsify(into_wasm_abi, from_wasm_abi))]
53pub enum SearchResult {
54    Document(DocumentURI),
55    Paragraph {
56        uri: DocumentElementURI,
57        fors: Vec<SymbolURI>,
58        def_like: bool,
59        kind: SearchResultKind,
60    },
61}
62
63#[derive(Copy, Clone, Debug)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65#[cfg_attr(feature = "wasm", derive(tsify_next::Tsify))]
66#[cfg_attr(feature = "wasm", tsify(into_wasm_abi, from_wasm_abi))]
67pub enum SearchResultKind {
68    Document = 0,
69    Paragraph = 1,
70    Definition = 2,
71    Example = 3,
72    Assertion = 4,
73    Problem = 5,
74}
75impl SearchResultKind {
76    #[must_use]
77    pub const fn as_str(&self) -> &'static str {
78        match self {
79            Self::Document => "Document",
80            Self::Paragraph => "Paragraph",
81            Self::Definition => "Definition",
82            Self::Example => "Example",
83            Self::Assertion => "Assertion",
84            Self::Problem => "Problem",
85        }
86    }
87}
88
89impl From<SearchResultKind> for u64 {
90    fn from(value: SearchResultKind) -> Self {
91        match value {
92            SearchResultKind::Document => 0,
93            SearchResultKind::Paragraph => 1,
94            SearchResultKind::Definition => 2,
95            SearchResultKind::Example => 3,
96            SearchResultKind::Assertion => 4,
97            SearchResultKind::Problem => 5,
98        }
99    }
100}
101
102impl TryFrom<u64> for SearchResultKind {
103    type Error = ();
104    fn try_from(value: u64) -> Result<Self, Self::Error> {
105        Ok(match value {
106            0 => Self::Document,
107            1 => Self::Paragraph,
108            2 => Self::Definition,
109            3 => Self::Example,
110            4 => Self::Assertion,
111            5 => Self::Problem,
112            _ => return Err(()),
113        })
114    }
115}
116impl TryFrom<ParagraphKind> for SearchResultKind {
117    type Error = ();
118    fn try_from(value: ParagraphKind) -> Result<Self, Self::Error> {
119        Ok(match value {
120            ParagraphKind::Assertion => Self::Assertion,
121            ParagraphKind::Definition => Self::Definition,
122            ParagraphKind::Example => Self::Example,
123            ParagraphKind::Paragraph => Self::Paragraph,
124            _ => return Err(()),
125        })
126    }
127}
128
129#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
130#[derive(Debug, Clone)]
131pub enum SearchIndex {
132    Document {
133        uri: DocumentURI,
134        title: Option<String>,
135        body: String,
136    },
137    Paragraph {
138        uri: DocumentElementURI,
139        kind: SearchResultKind,
140        definition_like: bool,
141        title: Option<String>,
142        fors: Vec<SymbolURI>,
143        body: String,
144    },
145}
146
147#[cfg(feature = "tantivy")]
148mod tantivy_i {
149    use super::*;
150    use crate::{
151        narration::{
152            documents::{Document, UncheckedDocument},
153            paragraphs::LogicalParagraph,
154            DocumentElement,
155        },
156        CheckingState,
157    };
158
159    pub struct SearchSchema {
160        #[allow(dead_code)]
161        pub schema: tantivy::schema::Schema,
162        uri: tantivy::schema::Field,
163        kind: tantivy::schema::Field,
164        title: tantivy::schema::Field,
165        body: tantivy::schema::Field,
166        fors: tantivy::schema::Field,
167        def_like: tantivy::schema::Field,
168    }
169    impl SearchSchema {
170        #[inline]
171        #[must_use]
172        pub fn get() -> &'static Self {
173            &SCHEMA
174        }
175    }
176
177    static SCHEMA: std::sync::LazyLock<SearchSchema> = std::sync::LazyLock::new(|| {
178        use tantivy::schema::{Schema, INDEXED, STORED, TEXT};
179        /*
180        let text_field_indexing = tantivy::schema::TextFieldIndexing::default()
181          .set_tokenizer("ngram3")
182          .set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
183        let txt_opts = tantivy::schema::TextOptions::default().set_indexing_options(text_field_indexing);
184         */
185
186        let mut schema = Schema::builder();
187        let kind = schema.add_u64_field("kind", INDEXED | STORED);
188        let uri = schema.add_text_field("uri", STORED);
189        let def_like = schema.add_bool_field("deflike", INDEXED | STORED);
190        let fors = schema.add_text_field("for", STORED);
191        let title = schema.add_text_field("title", TEXT);
192        let body = schema.add_text_field("body", TEXT); //txt_opts);//TEXT);
193
194        let schema = schema.build();
195        SearchSchema {
196            schema,
197            uri,
198            kind,
199            title,
200            body,
201            fors,
202            def_like,
203        }
204    });
205
206    /*impl QueryFilter {
207      #[must_use]
208      pub fn to_query(self,query:&str,index:&tantivy::Index) -> Option<Box<dyn tantivy::query::Query>> {
209        match self {
210          Self::Fragments(f) => f.to_query(query,index),
211          Self::Symbols => QueryFilter{ allow_documents:false, allow_paragraphs:true, allow_definitions:true, allow_examples:false, allow_assertions:true, allow_problems:false, definition_like_only:true}
212            .to_query(query,index)
213        }
214      }
215    }*/
216
217    impl QueryFilter {
218        #[must_use]
219        pub fn to_query(
220            self,
221            query: &str,
222            index: &tantivy::Index,
223        ) -> Option<Box<dyn tantivy::query::Query>> {
224            use std::fmt::Write;
225            let Self {
226                allow_documents,
227                allow_paragraphs,
228                allow_definitions,
229                allow_examples,
230                allow_assertions,
231                allow_problems,
232                definition_like_only,
233            } = self;
234            let mut s = String::new();
235            if !allow_documents
236                || !allow_paragraphs
237                || !allow_definitions
238                || !allow_examples
239                || !allow_assertions
240                || !allow_problems
241            {
242                s.push('(');
243                let mut had_first = false;
244                if allow_documents {
245                    had_first = true;
246                    s.push_str("kind:0");
247                }
248                if allow_paragraphs {
249                    s.push_str(if had_first { " OR kind:1" } else { "kind:1" });
250                    had_first = true;
251                }
252                if allow_definitions {
253                    s.push_str(if had_first { " OR kind:2" } else { "kind:2" });
254                    had_first = true;
255                }
256                if allow_examples {
257                    s.push_str(if had_first { " OR kind:3" } else { "kind:3" });
258                    had_first = true;
259                }
260                if allow_assertions {
261                    s.push_str(if had_first { " OR kind:4" } else { "kind:4" });
262                    had_first = true;
263                }
264                if allow_problems {
265                    s.push_str(if had_first { " OR kind:5" } else { "kind:5" });
266                }
267                s.push_str(") AND ");
268            }
269            if definition_like_only {
270                s.push_str("deflike:true AND ");
271            }
272            write!(s, "({query})").ok()?;
273            let mut parser =
274                tantivy::query::QueryParser::for_index(index, vec![SCHEMA.title, SCHEMA.body]);
275            //parser.set_field_fuzzy(SCHEMA.body, false, 1, true);
276            parser.set_conjunction_by_default();
277            parser.parse_query(&s).ok()
278        }
279    }
280
281    impl tantivy::schema::document::ValueDeserialize for SearchResultKind {
282        fn deserialize<'de, D>(
283            deserializer: D,
284        ) -> Result<Self, tantivy::schema::document::DeserializeError>
285        where
286            D: tantivy::schema::document::ValueDeserializer<'de>,
287        {
288            deserializer
289                .deserialize_u64()?
290                .try_into()
291                .map_err(|()| tantivy::schema::document::DeserializeError::custom(""))
292        }
293    }
294
295    impl tantivy::schema::document::DocumentDeserialize for SearchResult {
296        fn deserialize<'de, D>(
297            mut deserializer: D,
298        ) -> Result<Self, tantivy::schema::document::DeserializeError>
299        where
300            D: tantivy::schema::document::DocumentDeserializer<'de>,
301        {
302            macro_rules! next {
303                () => {{
304                    let Some((_, r)) = deserializer.next_field()? else {
305                        return Err(tantivy::schema::document::DeserializeError::custom(
306                            "Missing value",
307                        ));
308                    };
309                    r
310                }};
311                (!) => {{
312                    let Some((_, Wrapper(r))) = deserializer.next_field()? else {
313                        return Err(tantivy::schema::document::DeserializeError::custom(
314                            "Missing value",
315                        ));
316                    };
317                    r
318                }};
319            }
320            let kind = next!();
321            match kind {
322                SearchResultKind::Document => Ok(Self::Document(next!())),
323                kind => {
324                    let uri = next!();
325                    let def_like = next!(!);
326                    let mut fors = Vec::new();
327                    while let Some((_, s)) = deserializer.next_field()? {
328                        fors.push(s);
329                    }
330                    Ok(Self::Paragraph {
331                        uri,
332                        def_like,
333                        kind,
334                        fors,
335                    })
336                }
337            }
338        }
339    }
340
341    #[derive(Debug)]
342    struct Wrapper<T>(T);
343    impl tantivy::schema::document::ValueDeserialize for Wrapper<bool> {
344        fn deserialize<'de, D>(
345            deserializer: D,
346        ) -> Result<Self, tantivy::schema::document::DeserializeError>
347        where
348            D: tantivy::schema::document::ValueDeserializer<'de>,
349        {
350            Ok(Self(deserializer.deserialize_bool()?))
351        }
352    }
353
354    impl SearchIndex {
355        #[must_use]
356        pub fn html_to_search_text(html: &str) -> Option<String> {
357            fn replacer(s: &mut String) {
358                let mut i = 0;
359                loop {
360                    match s.as_bytes().get(i..i + 2) {
361                        None => return,
362                        Some(b".\n" | b"!\n" | b":\n" | b";\n") => i += 2,
363                        Some(b) if b[0] == b'\n' => {
364                            s.remove(i);
365                        }
366                        _ => i += 1,
367                    }
368                }
369            }
370            let mut s = html2text::from_read(html.as_bytes(), usize::MAX / 3).ok()?;
371            replacer(&mut s);
372            Some(s)
373        }
374    }
375
376    impl From<SearchIndex> for tantivy::TantivyDocument {
377        fn from(value: SearchIndex) -> Self {
378            let mut ret = Self::default();
379            match value {
380                SearchIndex::Document { uri, title, body } => {
381                    ret.add_u64(SCHEMA.kind, SearchResultKind::Document.into());
382                    ret.add_text(SCHEMA.uri, uri.to_string());
383                    if let Some(t) = title {
384                        ret.add_text(SCHEMA.title, t);
385                    }
386                    ret.add_text(SCHEMA.body, body);
387                }
388                SearchIndex::Paragraph {
389                    uri,
390                    kind,
391                    definition_like,
392                    title,
393                    fors,
394                    body,
395                } => {
396                    ret.add_u64(SCHEMA.kind, kind.into());
397                    ret.add_text(SCHEMA.uri, uri.to_string());
398                    ret.add_bool(SCHEMA.def_like, definition_like);
399                    for f in fors {
400                        //write!(trace,"\n   FOR: {}",f);
401                        ret.add_text(SCHEMA.fors, f.to_string());
402                    }
403                    if let Some(t) = title {
404                        ret.add_text(SCHEMA.title, t);
405                    }
406                    ret.add_text(SCHEMA.body, body);
407                }
408            }
409            ret
410        }
411    }
412
413    impl Document {
414        pub fn search_index(&self, html: &str) -> Option<SearchIndex> {
415            let title = self.title().and_then(|s| {
416                SearchIndex::html_to_search_text(s).or_else(|| {
417                    tracing::error!("Failed to plain textify title: {s}");
418                    None
419                })
420            });
421            let Some(body) = SearchIndex::html_to_search_text(html) else {
422                tracing::error!("Failed to plain textify body of {}", self.uri());
423                return None;
424            };
425            Some(SearchIndex::Document {
426                uri: self.uri().clone(),
427                title,
428                body,
429            })
430        }
431
432        #[must_use]
433        pub fn all_searches(&self, html: &str) -> Vec<SearchIndex> {
434            let mut ret = vec![];
435            if let Some(s) = self.search_index(html) {
436                ret.push(s);
437            }
438            for e in self.dfs() {
439                if let DocumentElement::Paragraph(p) = e {
440                    if let Some(s) = p.search_index(html) {
441                        ret.push(s);
442                    }
443                }
444            }
445            ret
446        }
447    }
448
449    impl UncheckedDocument {
450        pub fn search_index(&self, html: &str) -> Option<SearchIndex> {
451            let title = self.title.as_ref().and_then(|s| {
452                SearchIndex::html_to_search_text(s).or_else(|| {
453                    tracing::error!("Failed to plain textify title: {s}");
454                    None
455                })
456            });
457            let Some(body) = SearchIndex::html_to_search_text(html) else {
458                tracing::error!("Failed to plain textify body of {}", self.uri);
459                return None;
460            };
461            Some(SearchIndex::Document {
462                uri: self.uri.clone(),
463                title,
464                body,
465            })
466        }
467
468        #[must_use]
469        pub fn all_searches(&self, html: &str) -> Vec<SearchIndex> {
470            let mut ret = vec![];
471            if let Some(s) = self.search_index(html) {
472                ret.push(s);
473            }
474            for e in self.dfs() {
475                if let DocumentElement::Paragraph(p) = e {
476                    if let Some(s) = p.search_index(html) {
477                        ret.push(s);
478                    }
479                }
480            }
481            ret
482        }
483    }
484
485    impl<S: CheckingState> LogicalParagraph<S> {
486        pub fn search_index(&self, html: &str) -> Option<SearchIndex> {
487            let title = self.title.and_then(|range| {
488                html.get(range.start..range.end).map_or_else(
489                    || {
490                        tracing::error!("Failed to plain textify title: Range {range:?}");
491                        None
492                    },
493                    |s| {
494                        SearchIndex::html_to_search_text(s).or_else(|| {
495                            tracing::error!("Failed to plain textify title: {s}");
496                            None
497                        })
498                    },
499                )
500            });
501            let Some(body) = html.get(self.range.start..self.range.end) else {
502                tracing::error!("Failed to plain textify body of {}", self.uri);
503                return None;
504            };
505            let Some(body) = SearchIndex::html_to_search_text(body) else {
506                tracing::error!("Failed to plain textify body of {}", self.uri);
507                return None;
508            };
509            let fors = self.fors.iter().map(|(f, _)| f.clone()).collect();
510
511            let Ok(kind) = self.kind.try_into() else {
512                return None;
513            };
514            let definition_like = self.kind.is_definition_like(&self.styles);
515
516            Some(SearchIndex::Paragraph {
517                uri: self.uri.clone(),
518                kind,
519                definition_like,
520                title,
521                fors,
522                body,
523            })
524        }
525    }
526}
527#[cfg(feature = "tantivy")]
528pub use tantivy_i::*;