1#![allow(unexpected_cfgs)]
2#![cfg_attr(all(doc, CHANNEL_NIGHTLY), feature(doc_cfg))]
3#![doc = include_str!("../README.md")]
4#![cfg_attr(doc,doc = document_features::document_features!())]
8
9use flams_backend_types::search::{QueryFilter, SearchIndex, SearchResult};
10use flams_math_archives::{
11 Archive, LocallyBuilt,
12 artifacts::{Artifact, ContentResult, FileOrString},
13 backend::{AnyBackend, GlobalBackend, LocalBackend},
14 build_target,
15 formats::BuildResult,
16 utils::errors::{ArtifactSaveError, FileError},
17};
18use flams_system::FlamsExtension;
19use ftml_uris::{DocumentUri, SymbolUri, UriPath, UriWithArchive};
20
21use crate::{index::SearchIndexExt, schema::SearchSchema};
22
23pub mod index;
24pub mod query;
25pub mod schema;
26
27flams_system::register_exension!(FlamsExtension {
28 name: "tantivy_search",
29 on_start: initialize,
30 on_build_result: |b, uri, rel_path, a| if let Some(content) =
31 a.as_any().downcast_ref::<ContentResult>()
32 {
33 index(b, uri, rel_path, content);
34 }
35});
36
37build_target!(TANTIVY {
38 name: "tantivy_search",
39 description: "search index",
40 run: |_| BuildResult::default()
41});
42
43const MEMORY_SIZE: usize = 50_000_000;
44static SEARCHER: std::sync::LazyLock<Searcher> = std::sync::LazyLock::new(Searcher::new);
45static SPAN: std::sync::LazyLock<tracing::Span> =
46 std::sync::LazyLock::new(|| tracing::info_span!(target:"search",parent:None,"search"));
47
48pub struct Searcher {
49 index: parking_lot::RwLock<tantivy::index::Index>,
50 reader: parking_lot::RwLock<tantivy::IndexReader>,
51 writer: parking_lot::Mutex<()>,
52}
53impl Searcher {
54 #[inline]
55 #[must_use]
56 pub fn get() -> &'static Self {
57 &SEARCHER
58 }
59
60 fn new() -> Self {
61 let index =
62 tantivy::index::Index::create_in_ram(schema::SearchSchema::get().schema.clone());
63 Self {
64 reader: parking_lot::RwLock::new(index.reader().expect("Failed to build reader")),
65 index: parking_lot::RwLock::new(index),
66 writer: parking_lot::Mutex::new(()),
67 }
68 }
69
70 pub fn query(
71 &self,
72 s: &str,
73 opts: QueryFilter,
74 num_results: usize,
75 ) -> Option<Vec<(f32, SearchResult)>> {
76 SPAN.in_scope(move || {
77 let searcher = self.reader.read().searcher();
78 let query = query::build_query(s, &self.index.read(), opts)?;
79 let top_num = if num_results == 0 {
80 usize::MAX / 2
81 } else {
82 num_results
83 };
84 let mut ret = Vec::new();
85 for (s, a) in searcher
86 .search(&*query, &tantivy::collector::TopDocs::with_limit(top_num))
87 .ok()?
88 {
89 let query::Wrapper(r) = searcher.doc(a).ok()?;
90 ret.push((s, r));
91 }
92 Some(ret)
93 })
94 }
95
96 #[allow(clippy::type_complexity)]
97 pub fn query_symbols(
98 &self,
99 s: &str,
100 num_results: usize,
101 ) -> Option<Vec<(SymbolUri, Vec<(f32, SearchResult)>)>> {
102 SPAN.in_scope(move || {
103 const FILTER: QueryFilter = QueryFilter {
104 allow_documents: false,
105 allow_paragraphs: true,
106 allow_definitions: true,
107 allow_examples: false,
108 allow_assertions: true,
109 allow_problems: false,
110 definition_like_only: true,
111 };
112 let searcher = self.reader.read().searcher();
113
114 let query = query::build_query(s, &self.index.read(), FILTER)?;
115 let top_num = if num_results == 0 {
116 usize::MAX / 2
117 } else {
118 num_results
119 };
120 let mut ret: Vec<(SymbolUri, Vec<(f32, SearchResult)>)> = Vec::new();
121 for (s, a) in searcher
122 .search(
123 &*query,
124 &tantivy::collector::TopDocs::with_limit(top_num * 2),
125 )
126 .ok()?
127 {
128 let query::Wrapper(r): query::Wrapper<SearchResult> = searcher.doc(a).ok()?;
129 if let SearchResult::Paragraph { fors, .. } = &r {
130 for sym in fors {
131 if let Some(v) = ret
132 .iter_mut()
133 .find_map(|(k, v)| if *k == *sym { Some(v) } else { None })
134 {
135 v.push((s, r.clone()));
136 } else {
137 ret.push((sym.clone(), vec![(s, r.clone())]));
138 }
139 }
140 }
141 }
142 if ret.len() > num_results {
143 let _ = ret.split_off(num_results);
144 }
145 Some(ret)
146 })
147 }
148}
149
150fn index(backend: &AnyBackend, uri: &DocumentUri, rel_path: &UriPath, result: &ContentResult) {
151 backend.with_buildable_archive(uri.archive_id(), |a| {
152 if let Some(a) = a {
153 let it: Vec<_> = index::index_document(&result.document, &result.ftml).collect();
154 let _ = a.save(
155 uri,
156 Some(rel_path),
157 FileOrString::Str(String::new().into_boxed_str()),
158 TANTIVY.id(),
159 Some(Box::new(IndexFile(it)) as _),
160 GlobalBackend.triple_store(),
161 false,
162 );
163 }
164 });
165}
166
167struct IndexFile(Vec<SearchIndex>);
168impl Artifact for IndexFile {
169 fn as_any(&self) -> &dyn std::any::Any {
170 self as _
171 }
172 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
173 self as _
174 }
175 fn kind(&self) -> &'static str {
176 "tantivy"
177 }
178 fn write(&self, into: &std::path::Path) -> Result<(), ArtifactSaveError> {
179 let file = std::fs::File::create(into)
180 .map_err(|e| ArtifactSaveError::Fs(FileError::Creation(into.to_path_buf(), e)))?;
181 bincode::serde::encode_into_std_write(
182 &self.0,
183 &mut std::io::BufWriter::new(file),
184 bincode::config::standard(),
185 )?;
186 Ok(())
187 }
188}
189
190fn initialize() {
191 SPAN.in_scope(|| {
192 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
193 let index = tantivy::index::Index::create_in_ram(SearchSchema::get().schema.clone());
194 let mut writer = index
195 .writer(MEMORY_SIZE)
196 .expect("Failed to instantiate search writer");
197 let wr = &writer;
198 tracing::info_span!("Loading search indices").in_scope(move || {
199 GlobalBackend
200 .all_archives()
201 .par_iter()
202 .filter_map(|a| match a {
203 Archive::Local(a) => Some(a),
204 Archive::Ext(_, _) => None,
205 })
206 .for_each(|a| {
207 let out = a.out_dir();
208 if out.exists() && out.is_dir() {
209 for e in walkdir::WalkDir::new(out)
210 .into_iter()
211 .filter_map(Result::ok)
212 .filter(|entry| entry.file_name() == "tantivy")
213 {
214 let Ok(f) = std::fs::File::open(e.path()) else {
215 tracing::error!("error reading file {}", e.path().display());
216 return;
217 };
218 let file = std::io::BufReader::new(f);
219
220 let Ok(v): Result<Vec<SearchIndex>, _> =
221 bincode::serde::decode_from_reader(
222 file,
223 bincode::config::standard(),
224 )
225 else {
226 tracing::error!("error deserializing file {}", e.path().display());
227 return;
228 };
229 for d in v {
230 let d: tantivy::TantivyDocument = d.to_document();
231 if let Err(e) = wr.add_document(d) {
232 tracing::error!("{e}");
233 }
234 }
235 }
236 }
237 });
238 });
239 match writer.commit() {
240 Ok(i) => tracing::info!("Loaded {i} entries"),
241 Err(e) => tracing::error!("Error: {e}"),
242 }
243 let slf = Searcher::get();
244 let writer = slf.writer.lock();
245 let mut old_index = slf.index.write();
246 let mut reader = slf.reader.write();
247 let Ok(r) = index.reader() else {
248 tracing::error!("Failed to instantiate search reader");
249 return;
250 };
251 *reader = r;
252 *old_index = index;
253 drop(reader);
254 drop(old_index);
255 drop(writer);
256 });
257}