flams_system/
search.rs

1use flams_ontology::{
2    search::{QueryFilter, SearchIndex, SearchResult, SearchSchema},
3    uris::SymbolURI,
4};
5use flams_utils::vecmap::VecMap;
6
7use crate::backend::{archives::Archive, GlobalBackend};
8
9const MEMORY_SIZE: usize = 50_000_000;
10
11pub struct Searcher {
12    index: parking_lot::RwLock<tantivy::index::Index>,
13    reader: parking_lot::RwLock<tantivy::IndexReader>,
14    writer: parking_lot::Mutex<()>,
15}
16impl Searcher {
17    fn new() -> Self {
18        let index = tantivy::index::Index::create_in_ram(SearchSchema::get().schema.clone());
19        Self {
20            reader: parking_lot::RwLock::new(index.reader().expect("Failed to build reader")),
21            index: parking_lot::RwLock::new(index),
22            writer: parking_lot::Mutex::new(()),
23        }
24    }
25}
26
27lazy_static::lazy_static! {
28  static ref SEARCHER : Searcher = Searcher::new();
29  static ref SPAN: tracing::Span = tracing::info_span!(target:"tantivy",parent:None,"search");
30}
31
32struct WriterWrapper(tantivy::IndexWriter);
33impl Drop for WriterWrapper {
34    fn drop(&mut self) {
35        match self.0.commit() {
36            Ok(i) => tracing::info!("Loaded {i} entries"),
37            Err(e) => tracing::error!("Error: {e}"),
38        }
39    }
40}
41
42impl Searcher {
43    #[inline]
44    #[must_use]
45    pub fn get() -> &'static Self {
46        &SEARCHER
47    }
48
49    /// #### Panics
50    pub fn reload(&self) {
51        use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
52        SPAN.in_scope(move || {
53            let index = tantivy::index::Index::create_in_ram(SearchSchema::get().schema.clone());
54            let writer = WriterWrapper(
55                index
56                    .writer(MEMORY_SIZE)
57                    .expect("Failed to instantiate search writer"),
58            );
59            tracing::info_span!("Loading search indices").in_scope(move || {
60                GlobalBackend::get()
61                    .all_archives()
62                    .par_iter()
63                    .filter_map(|a| match a {
64                        Archive::Local(a) => Some(a),
65                        #[allow(unreachable_patterns)]
66                        _ => None,
67                    })
68                    .for_each(|a| {
69                        let out = a.out_dir();
70                        if out.exists() && out.is_dir() {
71                            for e in walkdir::WalkDir::new(out)
72                                .into_iter()
73                                .filter_map(Result::ok)
74                                .filter(|entry| entry.file_name() == "tantivy")
75                            {
76                                let Ok(f) = std::fs::File::open(e.path()) else {
77                                    tracing::error!("error reading file {}", e.path().display());
78                                    return;
79                                };
80                                let file = std::io::BufReader::new(f);
81
82                                let Ok(v): Result<Vec<SearchIndex>, _> =
83                                    bincode::serde::decode_from_reader(
84                                        file,
85                                        bincode::config::standard(),
86                                    )
87                                else {
88                                    tracing::error!(
89                                        "error deserializing file {}",
90                                        e.path().display()
91                                    );
92                                    return;
93                                };
94                                for d in v {
95                                    let d: tantivy::TantivyDocument = d.into();
96                                    if let Err(e) = writer.0.add_document(d) {
97                                        tracing::error!("{e}");
98                                    }
99                                }
100                            }
101                        }
102                    });
103            });
104            let writer = self.writer.lock();
105            let mut old_index = self.index.write();
106            let mut reader = self.reader.write();
107            let Ok(r) = index.reader() else {
108                tracing::error!("Failed to instantiate search reader");
109                return;
110            };
111            *reader = r;
112            *old_index = index;
113            drop(reader);
114            drop(old_index);
115            drop(writer);
116        });
117    }
118
119    /// #### Errors
120    #[allow(clippy::result_unit_err)]
121    pub fn with_writer<R>(
122        &self,
123        f: impl FnOnce(&mut tantivy::IndexWriter) -> Result<R, ()>,
124    ) -> Result<R, ()> {
125        SPAN.in_scope(move || {
126            let lock = self.writer.lock();
127            let mut write = self.index.read().writer(MEMORY_SIZE).map_err(|_| ())?;
128            let r = f(&mut write)?;
129            let i = write.commit().map_err(|_| ())?;
130            tracing::info!("Added {i} documents to search index");
131            *self.reader.write() = self.index.read().reader().map_err(|_| ())?;
132            drop(lock);
133            Ok(r)
134        })
135    }
136
137    pub fn query(
138        &self,
139        s: &str,
140        opts: QueryFilter,
141        num_results: usize,
142    ) -> Option<Vec<(f32, SearchResult)>> {
143        SPAN.in_scope(move || {
144            let searcher = self.reader.read().searcher();
145            let query = opts.to_query(s, &self.index.read())?;
146            let top_num = if num_results == 0 {
147                usize::MAX / 2
148            } else {
149                num_results
150            };
151            let mut ret = Vec::new();
152            for (s, a) in searcher
153                .search(&*query, &tantivy::collector::TopDocs::with_limit(top_num))
154                .ok()?
155            {
156                let r = searcher.doc(a).ok()?;
157                ret.push((s, r));
158            }
159            Some(ret)
160        })
161    }
162    pub fn query_symbols(
163        &self,
164        s: &str,
165        num_results: usize,
166    ) -> Option<VecMap<SymbolURI, Vec<(f32, SearchResult)>>> {
167        SPAN.in_scope(move || {
168            const FILTER: QueryFilter = QueryFilter {
169                allow_documents: false,
170                allow_paragraphs: true,
171                allow_definitions: true,
172                allow_examples: false,
173                allow_assertions: true,
174                allow_problems: false,
175                definition_like_only: true,
176            };
177            let searcher = self.reader.read().searcher();
178            let query = FILTER.to_query(s, &self.index.read())?;
179            let top_num = if num_results == 0 {
180                usize::MAX / 2
181            } else {
182                num_results
183            };
184            let mut ret = VecMap::new();
185            for (s, a) in searcher
186                .search(
187                    &*query,
188                    &tantivy::collector::TopDocs::with_limit(top_num * 2),
189                )
190                .ok()?
191            {
192                let r: SearchResult = searcher.doc(a).ok()?;
193                if let SearchResult::Paragraph { fors, .. } = &r {
194                    for sym in fors {
195                        ret.get_or_insert_mut(sym.clone(), Vec::new)
196                            .push((s, r.clone()));
197                    }
198                }
199            }
200            if ret.0.len() > num_results {
201                let _ = ret.0.split_off(num_results);
202            }
203            Some(ret)
204        })
205    }
206}