1use flams_ontology::{
2 search::{QueryFilter, SearchIndex, SearchResult, SearchSchema},
3 uris::SymbolURI,
4};
5use flams_utils::vecmap::VecMap;
6
7use crate::backend::{archives::Archive, GlobalBackend};
8
9const MEMORY_SIZE: usize = 50_000_000;
10
11pub struct Searcher {
12 index: parking_lot::RwLock<tantivy::index::Index>,
13 reader: parking_lot::RwLock<tantivy::IndexReader>,
14 writer: parking_lot::Mutex<()>,
15}
16impl Searcher {
17 fn new() -> Self {
18 let index = tantivy::index::Index::create_in_ram(SearchSchema::get().schema.clone());
19 Self {
20 reader: parking_lot::RwLock::new(index.reader().expect("Failed to build reader")),
21 index: parking_lot::RwLock::new(index),
22 writer: parking_lot::Mutex::new(()),
23 }
24 }
25}
26
27lazy_static::lazy_static! {
28 static ref SEARCHER : Searcher = Searcher::new();
29 static ref SPAN: tracing::Span = tracing::info_span!(target:"tantivy",parent:None,"search");
30}
31
32struct WriterWrapper(tantivy::IndexWriter);
33impl Drop for WriterWrapper {
34 fn drop(&mut self) {
35 match self.0.commit() {
36 Ok(i) => tracing::info!("Loaded {i} entries"),
37 Err(e) => tracing::error!("Error: {e}"),
38 }
39 }
40}
41
42impl Searcher {
43 #[inline]
44 #[must_use]
45 pub fn get() -> &'static Self {
46 &SEARCHER
47 }
48
49 pub fn reload(&self) {
51 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
52 SPAN.in_scope(move || {
53 let index = tantivy::index::Index::create_in_ram(SearchSchema::get().schema.clone());
54 let writer = WriterWrapper(
55 index
56 .writer(MEMORY_SIZE)
57 .expect("Failed to instantiate search writer"),
58 );
59 tracing::info_span!("Loading search indices").in_scope(move || {
60 GlobalBackend::get()
61 .all_archives()
62 .par_iter()
63 .filter_map(|a| match a {
64 Archive::Local(a) => Some(a),
65 #[allow(unreachable_patterns)]
66 _ => None,
67 })
68 .for_each(|a| {
69 let out = a.out_dir();
70 if out.exists() && out.is_dir() {
71 for e in walkdir::WalkDir::new(out)
72 .into_iter()
73 .filter_map(Result::ok)
74 .filter(|entry| entry.file_name() == "tantivy")
75 {
76 let Ok(f) = std::fs::File::open(e.path()) else {
77 tracing::error!("error reading file {}", e.path().display());
78 return;
79 };
80 let file = std::io::BufReader::new(f);
81
82 let Ok(v): Result<Vec<SearchIndex>, _> =
83 bincode::serde::decode_from_reader(
84 file,
85 bincode::config::standard(),
86 )
87 else {
88 tracing::error!(
89 "error deserializing file {}",
90 e.path().display()
91 );
92 return;
93 };
94 for d in v {
95 let d: tantivy::TantivyDocument = d.into();
96 if let Err(e) = writer.0.add_document(d) {
97 tracing::error!("{e}");
98 }
99 }
100 }
101 }
102 });
103 });
104 let writer = self.writer.lock();
105 let mut old_index = self.index.write();
106 let mut reader = self.reader.write();
107 let Ok(r) = index.reader() else {
108 tracing::error!("Failed to instantiate search reader");
109 return;
110 };
111 *reader = r;
112 *old_index = index;
113 drop(reader);
114 drop(old_index);
115 drop(writer);
116 });
117 }
118
119 #[allow(clippy::result_unit_err)]
121 pub fn with_writer<R>(
122 &self,
123 f: impl FnOnce(&mut tantivy::IndexWriter) -> Result<R, ()>,
124 ) -> Result<R, ()> {
125 SPAN.in_scope(move || {
126 let lock = self.writer.lock();
127 let mut write = self.index.read().writer(MEMORY_SIZE).map_err(|_| ())?;
128 let r = f(&mut write)?;
129 let i = write.commit().map_err(|_| ())?;
130 tracing::info!("Added {i} documents to search index");
131 *self.reader.write() = self.index.read().reader().map_err(|_| ())?;
132 drop(lock);
133 Ok(r)
134 })
135 }
136
137 pub fn query(
138 &self,
139 s: &str,
140 opts: QueryFilter,
141 num_results: usize,
142 ) -> Option<Vec<(f32, SearchResult)>> {
143 SPAN.in_scope(move || {
144 let searcher = self.reader.read().searcher();
145 let query = opts.to_query(s, &self.index.read())?;
146 let top_num = if num_results == 0 {
147 usize::MAX / 2
148 } else {
149 num_results
150 };
151 let mut ret = Vec::new();
152 for (s, a) in searcher
153 .search(&*query, &tantivy::collector::TopDocs::with_limit(top_num))
154 .ok()?
155 {
156 let r = searcher.doc(a).ok()?;
157 ret.push((s, r));
158 }
159 Some(ret)
160 })
161 }
162 pub fn query_symbols(
163 &self,
164 s: &str,
165 num_results: usize,
166 ) -> Option<VecMap<SymbolURI, Vec<(f32, SearchResult)>>> {
167 SPAN.in_scope(move || {
168 const FILTER: QueryFilter = QueryFilter {
169 allow_documents: false,
170 allow_paragraphs: true,
171 allow_definitions: true,
172 allow_examples: false,
173 allow_assertions: true,
174 allow_problems: false,
175 definition_like_only: true,
176 };
177 let searcher = self.reader.read().searcher();
178 let query = FILTER.to_query(s, &self.index.read())?;
179 let top_num = if num_results == 0 {
180 usize::MAX / 2
181 } else {
182 num_results
183 };
184 let mut ret = VecMap::new();
185 for (s, a) in searcher
186 .search(
187 &*query,
188 &tantivy::collector::TopDocs::with_limit(top_num * 2),
189 )
190 .ok()?
191 {
192 let r: SearchResult = searcher.doc(a).ok()?;
193 if let SearchResult::Paragraph { fors, .. } = &r {
194 for sym in fors {
195 ret.get_or_insert_mut(sym.clone(), Vec::new)
196 .push((s, r.clone()));
197 }
198 }
199 }
200 if ret.0.len() > num_results {
201 let _ = ret.0.split_off(num_results);
202 }
203 Some(ret)
204 })
205 }
206}