1use crate::{
2 Archive, MathArchive,
3 backend::LocalBackend,
4 formats::SourceFormatId,
5 mathhub::mathhubs,
6 source_files::FileStates,
7 utils::{
8 AsyncEngine,
9 errors::{ManifestParseError, NewArchiveError},
10 path_ext::{PathExt, RelPath},
11 },
12};
13#[cfg(feature = "cached")]
14use crate::{document_file::DocumentFile, utils::errors::BackendError};
15#[cfg(feature = "deepsize")]
16use flams_backend_types::ManagerCacheSize;
17use flams_backend_types::archive_json::{ArchiveIndex, Institution};
18#[cfg(feature = "cached")]
19use ftml_backend::utils::async_cache::AsyncCache; use ftml_ontology::{
21 domain::modules::Module,
22 utils::{RefTree, TreeChild},
23};
24use ftml_uris::{ArchiveId, ArchiveUri, BaseUri, UriName, UriPath, UriWithArchive};
25#[cfg(feature = "cached")]
26use ftml_uris::{DocumentUri, ModuleUri};
27use std::path::{Path, PathBuf};
28
29#[derive(Debug)]
30pub struct ArchiveManager {
31 pub(crate) tree: parking_lot::RwLock<ArchiveTree>,
32 #[cfg(feature = "cached")]
33 pub(crate) modules: AsyncCache<ModuleUri, Module, BackendError>,
34 #[cfg(feature = "cached")]
35 pub(crate) documents: AsyncCache<DocumentUri, triomphe::Arc<DocumentFile>, BackendError>,
36 #[cfg(feature = "rdf")]
37 triple_store: crate::triple_store::RDFStore,
38}
39
40#[cfg(feature = "rocksdb")]
41impl ArchiveManager {
42 pub fn new(rdf_path: &Path) -> Self {
43 Self {
44 tree: parking_lot::RwLock::new(ArchiveTree::default()),
45 #[cfg(feature = "cached")]
46 modules: AsyncCache::new(2048),
47 #[cfg(feature = "cached")]
48 documents: AsyncCache::new(4096),
49 #[cfg(feature = "rdf")]
50 triple_store: crate::triple_store::RDFStore::new(rdf_path),
51 }
52 }
53}
54
55impl Default for ArchiveManager {
56 fn default() -> Self {
57 Self {
58 tree: parking_lot::RwLock::new(ArchiveTree::default()),
59 #[cfg(feature = "cached")]
60 modules: AsyncCache::new(2048),
61 #[cfg(feature = "cached")]
62 documents: AsyncCache::new(4096),
63 #[cfg(feature = "rdf")]
64 triple_store: crate::triple_store::RDFStore::default(),
65 }
66 }
67}
68
69impl ArchiveManager {
70 #[cfg(feature = "deepsize")]
71 pub fn memory(&self) -> ManagerCacheSize {
72 use deepsize::DeepSizeOf;
73 let relations = self.triple_store.num_relations();
74 #[cfg(feature = "cached")]
75 {
76 let mut num_modules = 0;
77 let mut modules_bytes = 0;
78 self.modules.all(|_, v| {
79 num_modules += 1;
80 if let Some(Ok(m)) = v {
81 modules_bytes += m.deep_size_of();
82 }
83 });
84 let mut num_documents = 0;
85 let mut documents_bytes = 0;
86
87 #[cfg(feature = "cached")]
88 self.documents.all(|_, v| {
89 num_documents += 1;
90 if let Some(Ok(d)) = v {
91 documents_bytes += d.deep_size_of();
92 }
93 });
94 ManagerCacheSize {
95 num_modules,
96 modules_bytes,
97 num_documents,
98 documents_bytes,
99 relations,
100 }
101 }
102 #[cfg(not(feature = "cached"))]
103 {
104 ManagerCacheSize {
105 num_modules: 0,
106 modules_bytes: 0,
107 num_documents: 0,
108 documents_bytes: 0,
109 relations,
110 }
111 }
112 }
113
114 #[inline]
115 #[must_use]
116 pub fn all_archives(&self) -> impl std::ops::Deref<Target = [Archive]> + '_ {
117 parking_lot::RwLockReadGuard::map(self.tree.read(), |s| s.archives.as_slice())
118 }
119
120 #[cfg(feature = "rdf")]
121 #[inline]
122 #[must_use]
123 pub const fn triple_store(&self) -> &crate::triple_store::RDFStore {
124 &self.triple_store
125 }
126
127 #[inline]
128 pub fn with_tree<R>(&self, f: impl FnOnce(&ArchiveTree) -> R) -> R {
129 f(&self.tree.read())
130 }
131
132 pub fn reinit<R>(&self, f: impl FnOnce(&mut ArchiveTree) -> R, paths: &[&Path]) -> R {
133 let ls = self.tree.read().load(paths, false);
134 let mut tree = self.tree.write();
135 let r = f(&mut tree);
136 tree.archives.clear();
137 tree.top.clear();
138 *tree.index.write() = None;
139 #[cfg(feature = "cached")]
140 {
141 self.modules.clear();
142 self.documents.clear();
143 }
144 #[cfg(feature = "rdf")]
145 self.triple_store.clear();
146 for a in ls.into_iter().flatten() {
147 tree.insert(
148 a,
149 #[cfg(feature = "rdf")]
150 &self.triple_store,
151 );
152 }
153 r
154 }
155
156 pub(crate) fn load_module(
174 &self,
175 archive: &ArchiveUri,
176 path: Option<&UriPath>,
177 name: &UriName,
178 ) -> Result<Module, crate::BackendError> {
179 self.with_archive(archive.archive_id(), |a| {
180 let Some(a) = a else {
181 return Err(crate::BackendError::ArchiveNotFound(archive.clone()));
182 };
183 a.load_module(path, name)
184 })
185 }
186 pub(crate) fn load_module_async<A: AsyncEngine>(
187 &self,
188 archive: &ArchiveUri,
189 path: Option<&UriPath>,
190 name: &UriName,
191 ) -> impl Future<Output = Result<Module, crate::BackendError>> + 'static + use<A> {
192 self.with_archive(archive.archive_id(), |a| {
193 let Some(a) = a else {
194 return either::Left(std::future::ready(Err(
195 crate::BackendError::ArchiveNotFound(archive.clone()),
196 )));
197 };
198 either::Right(a.load_module_async::<A>(path, name))
199 })
200 }
201
202 pub fn load_one(&self, manifest: &Path, rel_path: RelPath) -> Result<(), ManifestParseError> {
204 let a = crate::manifest::parse_manifest(manifest, rel_path)?;
205 if let Archive::Local(a) = &a {
206 a.update_sources();
207 }
208 let mut tree = self.tree.write();
209 *tree.index.write() = None;
210 tree.insert(
211 a,
212 #[cfg(feature = "rdf")]
213 &self.triple_store,
214 );
215 drop(tree);
216 Ok(())
217 }
218
219 pub fn load(&self, paths: &[&Path]) {
220 let ls = self.tree.read().load(paths, true);
221 let mut lock = self.tree.write();
222 for a in ls.into_iter().flatten() {
223 lock.insert(
224 a,
225 #[cfg(feature = "rdf")]
226 &self.triple_store,
227 );
228 }
229 }
230
231 pub fn new_archive(
234 &self,
235 id: &ArchiveId,
236 base_uri: &BaseUri,
237 format: SourceFormatId,
238 default_file: &str,
239 content: &str,
240 ) -> Result<PathBuf, NewArchiveError> {
241 use std::io::Write;
242 let mh = *mathhubs().first().ok_or(NewArchiveError::NoMathHub)?;
243 let meta_inf = id
244 .steps()
245 .fold(mh.to_path_buf(), |p, s| p.join(s))
246 .join("META-INF");
247 let root = unsafe { meta_inf.parent().unwrap_unchecked() };
249 macro_rules! err {
250 ($p:pat = $expr:expr;$id:ident) => {
251 #[allow(clippy::let_unit_value)]
252 let $p = match $expr {
253 Ok(v) => v,
254 Err(e) => return Err(NewArchiveError::$id(root.to_path_buf(), e)),
255 };
256 };
257 }
258 macro_rules! dump {
259 ($f:expr; $($t:tt)*) => {
260 err!(f = std::fs::File::create(&$f);Write);
261 if let Err(e) = write!(std::io::BufWriter::new(f),$($t)*) {
262 return Err(NewArchiveError::Write($f, e));
263 }
264 };
265 }
266 err!(() = std::fs::create_dir_all(&meta_inf);CreateDir);
267 let manifest = meta_inf.join("MANIFEST.MF");
268 err!(mf = std::fs::File::create_new(&manifest);Write);
269 if let Err(e) = write!(
270 std::io::BufWriter::new(mf),
271 "id: {id}\nurl-base: {base_uri}\nformat: {}",
272 format.name
273 ) {
274 return Err(NewArchiveError::Write(manifest, e));
275 }
276 dump!(root.join(".gitignore");"{}",include_str!("gitignore_template.txt"));
277
278 let lib = root.join("lib");
279 err!(() = std::fs::create_dir_all(&lib);CreateDir);
280 let preamble = lib.join("preamble.tex");
281 dump!(preamble;"% preamble code for stex");
282
283 let source = root.join("source");
284 err!(() = std::fs::create_dir_all(&source);CreateDir);
285 let default = source.join(default_file);
286 dump!(default;"{}",content);
287 self.load_one(&manifest, RelPath::from_id(id))
288 .expect("this is a bug");
289 Ok(root.to_path_buf())
290 }
291
292 pub fn index(&self, external_url: &str) -> (Vec<Institution>, Vec<ArchiveIndex>) {
293 let tree = self.tree.read();
294 if let Some(idx) = (*tree.index.read()).clone() {
295 return idx;
296 }
297 let (is, ars) = tree.load_index(external_url);
298 *tree.index.write() = Some((is.clone(), ars.clone()));
299 drop(tree);
300 (is, ars)
301 }
302
303 pub fn index_async<A: AsyncEngine>(
304 external_url: impl Fn() -> &'static str + Send + Sync + 'static,
305 ) -> impl Future<Output = (Vec<Institution>, Vec<ArchiveIndex>)> + Send {
306 let tree = crate::backend::GlobalBackend.tree.read();
307 let idx = (*tree.index.read()).clone();
308 if let Some(idx) = idx {
309 return either::Left(std::future::ready(idx));
310 }
311 drop(tree);
312 either::Right(async move {
313 let (is, ars) = A::block_on(move || {
314 let tree = crate::backend::GlobalBackend.tree.read();
315 let (is, ars) = tree.load_index(external_url());
316 *tree.index.write() = Some((is.clone(), ars.clone())); drop(tree);
318 (is, ars)
319 })
320 .await;
321 (is, ars)
322 })
323 }
324}
325
326#[derive(Debug, Default)]
327pub struct ArchiveTree {
328 pub archives: Vec<Archive>,
329 pub top: Vec<ArchiveOrGroup>,
330 index: parking_lot::RwLock<
331 Option<
332 (Vec<Institution>, Vec<ArchiveIndex>),
334 >,
337 >, }
339impl ArchiveTree {
340 pub fn with_index<R>(
341 &self,
342 external_url: &str,
343 f: impl FnOnce(&[ArchiveIndex], &[Institution]) -> R,
344 ) -> R {
345 let lock = self.index.read();
346 if let Some((inst, idx)) = &*lock {
347 return f(idx, inst);
348 }
349 drop(lock);
350 let (is, ars) = self.load_index(external_url);
351 let r = f(&ars, &is);
352 *self.index.write() = Some((is, ars));
353 r
354 }
355}
356
357#[derive(Debug)]
358pub enum ArchiveOrGroup {
359 Archive(ArchiveId),
360 Group(ArchiveGroup),
361}
362
363#[derive(Debug)]
364pub struct ArchiveGroup {
365 pub id: ArchiveId,
366 pub children: Vec<ArchiveOrGroup>,
367 pub state: FileStates,
368}
369
370pub trait MaybeTriple: Send {
371 #[cfg(feature = "rdf")]
372 fn add_triple(&mut self, quad: impl FnOnce() -> ulo::rdf_types::Triple);
373}
374impl MaybeTriple for () {
375 #[cfg(feature = "rdf")]
376 #[inline]
377 fn add_triple(&mut self, _: impl FnOnce() -> ulo::rdf_types::Triple) {}
378}
379#[cfg(feature = "rdf")]
380impl<F> MaybeTriple for F
381where
382 F: FnMut(ulo::rdf_types::Triple) + Send,
383{
384 #[inline]
385 fn add_triple(&mut self, quad: impl FnOnce() -> ulo::rdf_types::Triple) {
386 self(quad());
387 }
388}
389
390impl ArchiveTree {
391 #[must_use]
392 pub fn state(&self) -> FileStates {
393 let mut r = FileStates::default();
394 for aog in &self.top {
395 match aog {
396 ArchiveOrGroup::Archive(a) => {
397 if let Some(Archive::Local(a)) = self.get(a) {
398 r.merge_all(&a.file_state.read().state);
399 }
400 }
401 ArchiveOrGroup::Group(g) => r.merge_all(&g.state),
402 }
403 }
404 r
405 }
406
407 #[must_use]
408 pub fn get_group_or_archive(&self, id: &ArchiveId) -> Option<&ArchiveOrGroup> {
409 let mut steps = id.steps().peekable();
410 let mut curr = &self.top;
411 while let Some(step) = steps.next() {
412 let e = curr
413 .binary_search_by_key(&step, |e| e.id().last())
414 .ok()
415 .map(|i| &curr[i])?;
416 if steps.peek().is_none() {
417 return Some(e);
418 }
419 if let ArchiveOrGroup::Group(g) = e {
420 curr = &g.children;
421 } else {
422 return None;
423 }
424 }
425 None
426 }
427
428 #[must_use]
429 pub fn get(&self, id: &ArchiveId) -> Option<&Archive> {
430 self.archives
431 .binary_search_by_key(&id, |a: &Archive| a.id())
432 .ok()
433 .map(|i| &self.archives[i])
434 }
435
436 #[allow(clippy::linkedlist)]
437 fn load(
438 &self,
439 paths: &[&Path],
440 skip_existent: bool,
441 ) -> std::collections::LinkedList<Vec<Archive>> {
442 use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
443 use spliter::ParallelSpliterator;
444 paths
445 .par_iter()
446 .flat_map(|p| {
447 crate::archive_iter::ManifestIterator::new(p)
448 .par_split()
449 .into_par_iter()
450 .map(move |r| (p, r))
451 .filter_map(|(mh, p)| {
452 let top_dir =
454 unsafe { p.parent().unwrap_unchecked().parent().unwrap_unchecked() };
455 let rel_path = top_dir.relative_to(mh)?;
456 let Some(id): Option<ArchiveId> = rel_path.parse().ok() else {
457 tracing::warn!("invalid archive id: {rel_path}");
458 return None;
459 };
460 if skip_existent && self.get(&id).is_some() {
461 return None;
462 }
463 match crate::manifest::parse_manifest(&p, rel_path) {
464 Ok(r) => Some(r),
465 Err(e) => {
466 tracing::warn!("{e} in {rel_path}");
467 None
468 }
469 }
470 })
471 .map(|a| {
472 if let Archive::Local(a) = &a {
473 a.update_sources();
474 }
475 a
476 })
477 })
478 .collect_vec_list()
479 }
483
484 fn insert(
485 &mut self,
486 archive: Archive,
487 #[cfg(feature = "rdf")] triple_store: &crate::triple_store::RDFStore,
488 ) {
489 #[cfg(feature = "rdf")]
490 let mut triples = vec![{
491 use ftml_uris::FtmlUri;
492 ulo::triple!(<(archive.uri().to_iri())>: ulo:library)
493 }];
494
495 let id = archive.id().clone();
496 let rel_path = RelPath::from_id(&id);
497 let steps = if let Some((group, _)) = rel_path.split_last() {
498 group.steps()
499 } else {
500 match self
501 .archives
502 .binary_search_by_key(&&id, |a: &Archive| a.id())
503 {
504 Ok(i) => self.archives[i] = archive,
505 Err(i) => self.archives.insert(i, archive),
506 }
507 match self
508 .top
509 .binary_search_by_key(&id.as_ref(), |v| v.id().last())
510 {
511 Ok(i) => self.top[i] = ArchiveOrGroup::Archive(id),
512 Err(i) => self.top.insert(i, ArchiveOrGroup::Archive(id)),
513 }
514 return;
515 };
516 let mut curr = &mut self.top;
517 let mut curr_name_len = 0;
518 let mut group = &id;
519 for step in steps {
520 if curr_name_len == 0 {
521 curr_name_len += step.len();
522 } else {
523 curr_name_len += step.len() + 1;
524 }
525 let curr_name = &id.as_ref()[..curr_name_len];
526 match curr.binary_search_by_key(&step, |v| v.id().last()) {
527 Ok(i) => {
528 let ArchiveOrGroup::Group(g) = &mut curr[i]
529 else {
531 unreachable!()
532 };
533 if let Archive::Local(a) = &archive {
534 g.state.merge_all(a.file_state.read().state());
535 }
536 group = &g.id;
537 curr = &mut g.children;
538 }
539 Err(i) => {
540 let mut state = FileStates::default();
541 if let Archive::Local(a) = &archive {
542 state.merge_all(a.file_state.read().state());
543 }
544 let g = ArchiveGroup {
545 id: unsafe { curr_name.parse().unwrap_unchecked() },
547 children: Vec::new(),
548 state,
549 };
550 curr.insert(i, ArchiveOrGroup::Group(g));
551 let ArchiveOrGroup::Group(g) = &mut curr[i] else {
552 unreachable!()
553 };
554 #[cfg(feature = "rdf")]
555 {
556 use ftml_uris::FtmlUri;
557 let iri = (archive.uri().base.clone() & g.id.clone()).to_iri();
558 if *group != id {
559 let parent = (archive.uri().base.clone() & group.clone()).to_iri();
560 triples.push(ulo::triple!(<(parent)> ulo:contains <(iri.clone())>));
561 }
562 triples.push(ulo::triple!(<(iri)>: ulo:library_group));
563 }
564 curr = &mut g.children;
565 }
566 }
567 }
568
569 #[cfg(feature = "rdf")]
570 {
571 use ftml_uris::FtmlUri;
572 let parent = (archive.uri().base.clone() & group.clone()).to_iri();
573 triples.push(ulo::triple!(<(parent)> ulo:contains <(archive.uri().to_iri())>));
574 let global = ulo::rdf_types::NamedNodeRef::new_unchecked("flams://archives");
575 triple_store.add_quads(triples.into_iter().map(|t| t.in_graph(global)));
576 }
577
578 match self
579 .archives
580 .binary_search_by_key(&&id, |a: &Archive| a.id())
581 {
582 Ok(i) => self.archives[i] = archive,
583 Err(i) => self.archives.insert(i, archive),
584 }
585 match curr.binary_search_by_key(&id.last(), |v| v.id().last()) {
586 Ok(i) => curr[i] = ArchiveOrGroup::Archive(id),
587 Err(i) => curr.insert(i, ArchiveOrGroup::Archive(id)),
588 }
589 }
590
591 fn load_index(&self, external_url: &str) -> (Vec<Institution>, Vec<ArchiveIndex>) {
592 let mut is = Vec::new();
593 let mut ai = Vec::new();
594 for a in &self.archives {
595 let Some(p) = crate::LocalArchive::manifest_of(a.path()) else {
596 continue;
597 };
598 let Some(p) = p.parent().map(|p| p.join("archive.json")) else {
599 continue;
600 };
601 let (isi, ars) = crate::manifest::read_archive_json(a.uri(), &p, external_url);
602 for i in isi {
603 if !is.contains(&i) {
604 is.push(i);
605 }
606 }
607 for a in ars {
608 if !ai.contains(&a) {
609 ai.push(a);
610 }
611 }
612 }
613 (is, ai)
614 }
615}
616
617impl ArchiveOrGroup {
618 #[inline]
619 #[must_use]
620 pub const fn id(&self) -> &ArchiveId {
621 match self {
622 Self::Archive(id) => id,
623 Self::Group(g) => &g.id,
624 }
625 }
626}
627
628impl<'a> TreeChild<'a> for &'a ArchiveOrGroup {
629 fn tree_children(self) -> impl Iterator<Item = Self> {
630 match self {
631 ArchiveOrGroup::Archive(_) => either::Either::Left(std::iter::empty()),
632 ArchiveOrGroup::Group(g) => either::Either::Right(g.children.iter()),
633 }
634 }
635}
636
637impl RefTree for ArchiveTree {
638 type Child<'a>
639 = &'a ArchiveOrGroup
640 where
641 Self: 'a;
642 #[inline]
643 fn tree_children(&self) -> impl Iterator<Item = Self::Child<'_>> {
644 self.top.iter()
645 }
646}
647impl RefTree for ArchiveGroup {
648 type Child<'a>
649 = &'a ArchiveOrGroup
650 where
651 Self: 'a;
652 #[inline]
653 fn tree_children(&self) -> impl Iterator<Item = Self::Child<'_>> {
654 self.children.iter()
655 }
656}