1#![allow(clippy::wildcard_imports)]
2
3use crate::{
4 narration::paragraphs::ParagraphKind,
5 uris::{DocumentElementURI, DocumentURI, SymbolURI},
6};
7
8#[allow(dead_code)]
9const fn get_true() -> bool {
10 true
11}
12
13#[allow(clippy::struct_excessive_bools)]
14#[derive(Copy, Clone, Debug, PartialEq, Eq)]
15#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
16#[cfg_attr(feature = "wasm", derive(tsify_next::Tsify))]
17#[cfg_attr(feature = "wasm", tsify(into_wasm_abi, from_wasm_abi))]
18pub struct QueryFilter {
19 #[cfg_attr(feature = "serde", serde(default = "get_true"))]
20 pub allow_documents: bool,
21 #[cfg_attr(feature = "serde", serde(default = "get_true"))]
22 pub allow_paragraphs: bool,
23 #[cfg_attr(feature = "serde", serde(default = "get_true"))]
24 pub allow_definitions: bool,
25 #[cfg_attr(feature = "serde", serde(default = "get_true"))]
26 pub allow_examples: bool,
27 #[cfg_attr(feature = "serde", serde(default = "get_true"))]
28 pub allow_assertions: bool,
29 #[cfg_attr(feature = "serde", serde(default = "get_true"))]
30 pub allow_problems: bool,
31 #[cfg_attr(feature = "serde", serde(default))]
32 pub definition_like_only: bool,
33}
34
35impl Default for QueryFilter {
36 fn default() -> Self {
37 Self {
38 allow_documents: true,
39 allow_paragraphs: true,
40 allow_definitions: true,
41 allow_examples: true,
42 allow_assertions: true,
43 allow_problems: true,
44 definition_like_only: false,
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
50#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
51#[cfg_attr(feature = "wasm", derive(tsify_next::Tsify))]
52#[cfg_attr(feature = "wasm", tsify(into_wasm_abi, from_wasm_abi))]
53pub enum SearchResult {
54 Document(DocumentURI),
55 Paragraph {
56 uri: DocumentElementURI,
57 fors: Vec<SymbolURI>,
58 def_like: bool,
59 kind: SearchResultKind,
60 },
61}
62
63#[derive(Copy, Clone, Debug)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65#[cfg_attr(feature = "wasm", derive(tsify_next::Tsify))]
66#[cfg_attr(feature = "wasm", tsify(into_wasm_abi, from_wasm_abi))]
67pub enum SearchResultKind {
68 Document = 0,
69 Paragraph = 1,
70 Definition = 2,
71 Example = 3,
72 Assertion = 4,
73 Problem = 5,
74}
75impl SearchResultKind {
76 #[must_use]
77 pub const fn as_str(&self) -> &'static str {
78 match self {
79 Self::Document => "Document",
80 Self::Paragraph => "Paragraph",
81 Self::Definition => "Definition",
82 Self::Example => "Example",
83 Self::Assertion => "Assertion",
84 Self::Problem => "Problem",
85 }
86 }
87}
88
89impl From<SearchResultKind> for u64 {
90 fn from(value: SearchResultKind) -> Self {
91 match value {
92 SearchResultKind::Document => 0,
93 SearchResultKind::Paragraph => 1,
94 SearchResultKind::Definition => 2,
95 SearchResultKind::Example => 3,
96 SearchResultKind::Assertion => 4,
97 SearchResultKind::Problem => 5,
98 }
99 }
100}
101
102impl TryFrom<u64> for SearchResultKind {
103 type Error = ();
104 fn try_from(value: u64) -> Result<Self, Self::Error> {
105 Ok(match value {
106 0 => Self::Document,
107 1 => Self::Paragraph,
108 2 => Self::Definition,
109 3 => Self::Example,
110 4 => Self::Assertion,
111 5 => Self::Problem,
112 _ => return Err(()),
113 })
114 }
115}
116impl TryFrom<ParagraphKind> for SearchResultKind {
117 type Error = ();
118 fn try_from(value: ParagraphKind) -> Result<Self, Self::Error> {
119 Ok(match value {
120 ParagraphKind::Assertion => Self::Assertion,
121 ParagraphKind::Definition => Self::Definition,
122 ParagraphKind::Example => Self::Example,
123 ParagraphKind::Paragraph => Self::Paragraph,
124 _ => return Err(()),
125 })
126 }
127}
128
129#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
130#[derive(Debug, Clone)]
131pub enum SearchIndex {
132 Document {
133 uri: DocumentURI,
134 title: Option<String>,
135 body: String,
136 },
137 Paragraph {
138 uri: DocumentElementURI,
139 kind: SearchResultKind,
140 definition_like: bool,
141 title: Option<String>,
142 fors: Vec<SymbolURI>,
143 body: String,
144 },
145}
146
147#[cfg(feature = "tantivy")]
148mod tantivy_i {
149 use super::*;
150 use crate::{
151 narration::{
152 documents::{Document, UncheckedDocument},
153 paragraphs::LogicalParagraph,
154 DocumentElement,
155 },
156 CheckingState,
157 };
158
159 pub struct SearchSchema {
160 #[allow(dead_code)]
161 pub schema: tantivy::schema::Schema,
162 uri: tantivy::schema::Field,
163 kind: tantivy::schema::Field,
164 title: tantivy::schema::Field,
165 body: tantivy::schema::Field,
166 fors: tantivy::schema::Field,
167 def_like: tantivy::schema::Field,
168 }
169 impl SearchSchema {
170 #[inline]
171 #[must_use]
172 pub fn get() -> &'static Self {
173 &SCHEMA
174 }
175 }
176
177 static SCHEMA: std::sync::LazyLock<SearchSchema> = std::sync::LazyLock::new(|| {
178 use tantivy::schema::{Schema, INDEXED, STORED, TEXT};
179 let mut schema = Schema::builder();
187 let kind = schema.add_u64_field("kind", INDEXED | STORED);
188 let uri = schema.add_text_field("uri", STORED);
189 let def_like = schema.add_bool_field("deflike", INDEXED | STORED);
190 let fors = schema.add_text_field("for", STORED);
191 let title = schema.add_text_field("title", TEXT);
192 let body = schema.add_text_field("body", TEXT); let schema = schema.build();
195 SearchSchema {
196 schema,
197 uri,
198 kind,
199 title,
200 body,
201 fors,
202 def_like,
203 }
204 });
205
206 impl QueryFilter {
218 #[must_use]
219 pub fn to_query(
220 self,
221 query: &str,
222 index: &tantivy::Index,
223 ) -> Option<Box<dyn tantivy::query::Query>> {
224 use std::fmt::Write;
225 let Self {
226 allow_documents,
227 allow_paragraphs,
228 allow_definitions,
229 allow_examples,
230 allow_assertions,
231 allow_problems,
232 definition_like_only,
233 } = self;
234 let mut s = String::new();
235 if !allow_documents
236 || !allow_paragraphs
237 || !allow_definitions
238 || !allow_examples
239 || !allow_assertions
240 || !allow_problems
241 {
242 s.push('(');
243 let mut had_first = false;
244 if allow_documents {
245 had_first = true;
246 s.push_str("kind:0");
247 }
248 if allow_paragraphs {
249 s.push_str(if had_first { " OR kind:1" } else { "kind:1" });
250 had_first = true;
251 }
252 if allow_definitions {
253 s.push_str(if had_first { " OR kind:2" } else { "kind:2" });
254 had_first = true;
255 }
256 if allow_examples {
257 s.push_str(if had_first { " OR kind:3" } else { "kind:3" });
258 had_first = true;
259 }
260 if allow_assertions {
261 s.push_str(if had_first { " OR kind:4" } else { "kind:4" });
262 had_first = true;
263 }
264 if allow_problems {
265 s.push_str(if had_first { " OR kind:5" } else { "kind:5" });
266 }
267 s.push_str(") AND ");
268 }
269 if definition_like_only {
270 s.push_str("deflike:true AND ");
271 }
272 write!(s, "({query})").ok()?;
273 let mut parser =
274 tantivy::query::QueryParser::for_index(index, vec![SCHEMA.title, SCHEMA.body]);
275 parser.set_conjunction_by_default();
277 parser.parse_query(&s).ok()
278 }
279 }
280
281 impl tantivy::schema::document::ValueDeserialize for SearchResultKind {
282 fn deserialize<'de, D>(
283 deserializer: D,
284 ) -> Result<Self, tantivy::schema::document::DeserializeError>
285 where
286 D: tantivy::schema::document::ValueDeserializer<'de>,
287 {
288 deserializer
289 .deserialize_u64()?
290 .try_into()
291 .map_err(|()| tantivy::schema::document::DeserializeError::custom(""))
292 }
293 }
294
295 impl tantivy::schema::document::DocumentDeserialize for SearchResult {
296 fn deserialize<'de, D>(
297 mut deserializer: D,
298 ) -> Result<Self, tantivy::schema::document::DeserializeError>
299 where
300 D: tantivy::schema::document::DocumentDeserializer<'de>,
301 {
302 macro_rules! next {
303 () => {{
304 let Some((_, r)) = deserializer.next_field()? else {
305 return Err(tantivy::schema::document::DeserializeError::custom(
306 "Missing value",
307 ));
308 };
309 r
310 }};
311 (!) => {{
312 let Some((_, Wrapper(r))) = deserializer.next_field()? else {
313 return Err(tantivy::schema::document::DeserializeError::custom(
314 "Missing value",
315 ));
316 };
317 r
318 }};
319 }
320 let kind = next!();
321 match kind {
322 SearchResultKind::Document => Ok(Self::Document(next!())),
323 kind => {
324 let uri = next!();
325 let def_like = next!(!);
326 let mut fors = Vec::new();
327 while let Some((_, s)) = deserializer.next_field()? {
328 fors.push(s);
329 }
330 Ok(Self::Paragraph {
331 uri,
332 def_like,
333 kind,
334 fors,
335 })
336 }
337 }
338 }
339 }
340
341 #[derive(Debug)]
342 struct Wrapper<T>(T);
343 impl tantivy::schema::document::ValueDeserialize for Wrapper<bool> {
344 fn deserialize<'de, D>(
345 deserializer: D,
346 ) -> Result<Self, tantivy::schema::document::DeserializeError>
347 where
348 D: tantivy::schema::document::ValueDeserializer<'de>,
349 {
350 Ok(Self(deserializer.deserialize_bool()?))
351 }
352 }
353
354 impl SearchIndex {
355 #[must_use]
356 pub fn html_to_search_text(html: &str) -> Option<String> {
357 fn replacer(s: &mut String) {
358 let mut i = 0;
359 loop {
360 match s.as_bytes().get(i..i + 2) {
361 None => return,
362 Some(b".\n" | b"!\n" | b":\n" | b";\n") => i += 2,
363 Some(b) if b[0] == b'\n' => {
364 s.remove(i);
365 }
366 _ => i += 1,
367 }
368 }
369 }
370 let mut s = html2text::from_read(html.as_bytes(), usize::MAX / 3).ok()?;
371 replacer(&mut s);
372 Some(s)
373 }
374 }
375
376 impl From<SearchIndex> for tantivy::TantivyDocument {
377 fn from(value: SearchIndex) -> Self {
378 let mut ret = Self::default();
379 match value {
380 SearchIndex::Document { uri, title, body } => {
381 ret.add_u64(SCHEMA.kind, SearchResultKind::Document.into());
382 ret.add_text(SCHEMA.uri, uri.to_string());
383 if let Some(t) = title {
384 ret.add_text(SCHEMA.title, t);
385 }
386 ret.add_text(SCHEMA.body, body);
387 }
388 SearchIndex::Paragraph {
389 uri,
390 kind,
391 definition_like,
392 title,
393 fors,
394 body,
395 } => {
396 ret.add_u64(SCHEMA.kind, kind.into());
397 ret.add_text(SCHEMA.uri, uri.to_string());
398 ret.add_bool(SCHEMA.def_like, definition_like);
399 for f in fors {
400 ret.add_text(SCHEMA.fors, f.to_string());
402 }
403 if let Some(t) = title {
404 ret.add_text(SCHEMA.title, t);
405 }
406 ret.add_text(SCHEMA.body, body);
407 }
408 }
409 ret
410 }
411 }
412
413 impl Document {
414 pub fn search_index(&self, html: &str) -> Option<SearchIndex> {
415 let title = self.title().and_then(|s| {
416 SearchIndex::html_to_search_text(s).or_else(|| {
417 tracing::error!("Failed to plain textify title: {s}");
418 None
419 })
420 });
421 let Some(body) = SearchIndex::html_to_search_text(html) else {
422 tracing::error!("Failed to plain textify body of {}", self.uri());
423 return None;
424 };
425 Some(SearchIndex::Document {
426 uri: self.uri().clone(),
427 title,
428 body,
429 })
430 }
431
432 #[must_use]
433 pub fn all_searches(&self, html: &str) -> Vec<SearchIndex> {
434 let mut ret = vec![];
435 if let Some(s) = self.search_index(html) {
436 ret.push(s);
437 }
438 for e in self.dfs() {
439 if let DocumentElement::Paragraph(p) = e {
440 if let Some(s) = p.search_index(html) {
441 ret.push(s);
442 }
443 }
444 }
445 ret
446 }
447 }
448
449 impl UncheckedDocument {
450 pub fn search_index(&self, html: &str) -> Option<SearchIndex> {
451 let title = self.title.as_ref().and_then(|s| {
452 SearchIndex::html_to_search_text(s).or_else(|| {
453 tracing::error!("Failed to plain textify title: {s}");
454 None
455 })
456 });
457 let Some(body) = SearchIndex::html_to_search_text(html) else {
458 tracing::error!("Failed to plain textify body of {}", self.uri);
459 return None;
460 };
461 Some(SearchIndex::Document {
462 uri: self.uri.clone(),
463 title,
464 body,
465 })
466 }
467
468 #[must_use]
469 pub fn all_searches(&self, html: &str) -> Vec<SearchIndex> {
470 let mut ret = vec![];
471 if let Some(s) = self.search_index(html) {
472 ret.push(s);
473 }
474 for e in self.dfs() {
475 if let DocumentElement::Paragraph(p) = e {
476 if let Some(s) = p.search_index(html) {
477 ret.push(s);
478 }
479 }
480 }
481 ret
482 }
483 }
484
485 impl<S: CheckingState> LogicalParagraph<S> {
486 pub fn search_index(&self, html: &str) -> Option<SearchIndex> {
487 let title = self.title.and_then(|range| {
488 html.get(range.start..range.end).map_or_else(
489 || {
490 tracing::error!("Failed to plain textify title: Range {range:?}");
491 None
492 },
493 |s| {
494 SearchIndex::html_to_search_text(s).or_else(|| {
495 tracing::error!("Failed to plain textify title: {s}");
496 None
497 })
498 },
499 )
500 });
501 let Some(body) = html.get(self.range.start..self.range.end) else {
502 tracing::error!("Failed to plain textify body of {}", self.uri);
503 return None;
504 };
505 let Some(body) = SearchIndex::html_to_search_text(body) else {
506 tracing::error!("Failed to plain textify body of {}", self.uri);
507 return None;
508 };
509 let fors = self.fors.iter().map(|(f, _)| f.clone()).collect();
510
511 let Ok(kind) = self.kind.try_into() else {
512 return None;
513 };
514 let definition_like = self.kind.is_definition_like(&self.styles);
515
516 Some(SearchIndex::Paragraph {
517 uri: self.uri.clone(),
518 kind,
519 definition_like,
520 title,
521 fors,
522 body,
523 })
524 }
525 }
526}
527#[cfg(feature = "tantivy")]
528pub use tantivy_i::*;