Skip to main content

flams_math_archives/
manager.rs

1use crate::{
2    Archive, MathArchive,
3    backend::LocalBackend,
4    formats::SourceFormatId,
5    mathhub::mathhubs,
6    source_files::FileStates,
7    utils::{
8        AsyncEngine,
9        errors::{ManifestParseError, NewArchiveError},
10        path_ext::{PathExt, RelPath},
11    },
12};
13#[cfg(feature = "cached")]
14use crate::{document_file::DocumentFile, utils::errors::BackendError};
15#[cfg(feature = "deepsize")]
16use flams_backend_types::ManagerCacheSize;
17use flams_backend_types::archive_json::{ArchiveIndex, Institution};
18#[cfg(feature = "cached")]
19use ftml_backend::utils::async_cache::AsyncCache; //, MaybeValue};
20use ftml_ontology::{
21    domain::modules::Module,
22    utils::{RefTree, TreeChild},
23};
24use ftml_uris::{ArchiveId, ArchiveUri, BaseUri, UriName, UriPath, UriWithArchive};
25#[cfg(feature = "cached")]
26use ftml_uris::{DocumentUri, ModuleUri};
27use std::path::{Path, PathBuf};
28
29#[derive(Debug)]
30pub struct ArchiveManager {
31    pub(crate) tree: parking_lot::RwLock<ArchiveTree>,
32    #[cfg(feature = "cached")]
33    pub(crate) modules: AsyncCache<ModuleUri, Module, BackendError>,
34    #[cfg(feature = "cached")]
35    pub(crate) documents: AsyncCache<DocumentUri, triomphe::Arc<DocumentFile>, BackendError>,
36    #[cfg(feature = "rdf")]
37    triple_store: crate::triple_store::RDFStore,
38}
39
40#[cfg(feature = "rocksdb")]
41impl ArchiveManager {
42    pub fn new(rdf_path: &Path) -> Self {
43        Self {
44            tree: parking_lot::RwLock::new(ArchiveTree::default()),
45            #[cfg(feature = "cached")]
46            modules: AsyncCache::new(2048),
47            #[cfg(feature = "cached")]
48            documents: AsyncCache::new(4096),
49            #[cfg(feature = "rdf")]
50            triple_store: crate::triple_store::RDFStore::new(rdf_path),
51        }
52    }
53}
54
55impl Default for ArchiveManager {
56    fn default() -> Self {
57        Self {
58            tree: parking_lot::RwLock::new(ArchiveTree::default()),
59            #[cfg(feature = "cached")]
60            modules: AsyncCache::new(2048),
61            #[cfg(feature = "cached")]
62            documents: AsyncCache::new(4096),
63            #[cfg(feature = "rdf")]
64            triple_store: crate::triple_store::RDFStore::default(),
65        }
66    }
67}
68
69impl ArchiveManager {
70    #[cfg(feature = "deepsize")]
71    pub fn memory(&self) -> ManagerCacheSize {
72        use deepsize::DeepSizeOf;
73        let relations = self.triple_store.num_relations();
74        #[cfg(feature = "cached")]
75        {
76            let mut num_modules = 0;
77            let mut modules_bytes = 0;
78            self.modules.all(|_, v| {
79                num_modules += 1;
80                if let Some(Ok(m)) = v {
81                    modules_bytes += m.deep_size_of();
82                }
83            });
84            let mut num_documents = 0;
85            let mut documents_bytes = 0;
86
87            #[cfg(feature = "cached")]
88            self.documents.all(|_, v| {
89                num_documents += 1;
90                if let Some(Ok(d)) = v {
91                    documents_bytes += d.deep_size_of();
92                }
93            });
94            ManagerCacheSize {
95                num_modules,
96                modules_bytes,
97                num_documents,
98                documents_bytes,
99                relations,
100            }
101        }
102        #[cfg(not(feature = "cached"))]
103        {
104            ManagerCacheSize {
105                num_modules: 0,
106                modules_bytes: 0,
107                num_documents: 0,
108                documents_bytes: 0,
109                relations,
110            }
111        }
112    }
113
114    #[inline]
115    #[must_use]
116    pub fn all_archives(&self) -> impl std::ops::Deref<Target = [Archive]> + '_ {
117        parking_lot::RwLockReadGuard::map(self.tree.read(), |s| s.archives.as_slice())
118    }
119
120    #[cfg(feature = "rdf")]
121    #[inline]
122    #[must_use]
123    pub const fn triple_store(&self) -> &crate::triple_store::RDFStore {
124        &self.triple_store
125    }
126
127    #[inline]
128    pub fn with_tree<R>(&self, f: impl FnOnce(&ArchiveTree) -> R) -> R {
129        f(&self.tree.read())
130    }
131
132    pub fn reinit<R>(&self, f: impl FnOnce(&mut ArchiveTree) -> R, paths: &[&Path]) -> R {
133        let ls = self.tree.read().load(paths, false);
134        let mut tree = self.tree.write();
135        let r = f(&mut tree);
136        tree.archives.clear();
137        tree.top.clear();
138        *tree.index.write() = None;
139        #[cfg(feature = "cached")]
140        {
141            self.modules.clear();
142            self.documents.clear();
143        }
144        #[cfg(feature = "rdf")]
145        self.triple_store.clear();
146        for a in ls.into_iter().flatten() {
147            tree.insert(
148                a,
149                #[cfg(feature = "rdf")]
150                &self.triple_store,
151            );
152        }
153        r
154    }
155
156    /*
157    pub(crate) fn load_document(
158        &self,
159        archive: &ArchiveUri,
160        path: Option<&UriPath>,
161        language: Language,
162        name: &SimpleUriName,
163    ) -> Option<UncheckedDocument> {
164        self.with_archive(archive.archive_id(), |a| {
165            let Some(a) = a else {
166                return Err(crate::BackendError::ArchiveNotFound);
167            };
168            a.load_document(path, name, language)
169        })
170    }
171     */
172
173    pub(crate) fn load_module(
174        &self,
175        archive: &ArchiveUri,
176        path: Option<&UriPath>,
177        name: &UriName,
178    ) -> Result<Module, crate::BackendError> {
179        self.with_archive(archive.archive_id(), |a| {
180            let Some(a) = a else {
181                return Err(crate::BackendError::ArchiveNotFound(archive.clone()));
182            };
183            a.load_module(path, name)
184        })
185    }
186    pub(crate) fn load_module_async<A: AsyncEngine>(
187        &self,
188        archive: &ArchiveUri,
189        path: Option<&UriPath>,
190        name: &UriName,
191    ) -> impl Future<Output = Result<Module, crate::BackendError>> + 'static + use<A> {
192        self.with_archive(archive.archive_id(), |a| {
193            let Some(a) = a else {
194                return either::Left(std::future::ready(Err(
195                    crate::BackendError::ArchiveNotFound(archive.clone()),
196                )));
197            };
198            either::Right(a.load_module_async::<A>(path, name))
199        })
200    }
201
202    /// # Errors
203    pub fn load_one(&self, manifest: &Path, rel_path: RelPath) -> Result<(), ManifestParseError> {
204        let a = crate::manifest::parse_manifest(manifest, rel_path)?;
205        if let Archive::Local(a) = &a {
206            a.update_sources();
207        }
208        let mut tree = self.tree.write();
209        *tree.index.write() = None;
210        tree.insert(
211            a,
212            #[cfg(feature = "rdf")]
213            &self.triple_store,
214        );
215        drop(tree);
216        Ok(())
217    }
218
219    pub fn load(&self, paths: &[&Path]) {
220        let ls = self.tree.read().load(paths, true);
221        let mut lock = self.tree.write();
222        for a in ls.into_iter().flatten() {
223            lock.insert(
224                a,
225                #[cfg(feature = "rdf")]
226                &self.triple_store,
227            );
228        }
229    }
230
231    /// # Errors
232    /// # Panics
233    pub fn new_archive(
234        &self,
235        id: &ArchiveId,
236        base_uri: &BaseUri,
237        format: SourceFormatId,
238        default_file: &str,
239        content: &str,
240    ) -> Result<PathBuf, NewArchiveError> {
241        use std::io::Write;
242        let mh = *mathhubs().first().ok_or(NewArchiveError::NoMathHub)?;
243        let meta_inf = id
244            .steps()
245            .fold(mh.to_path_buf(), |p, s| p.join(s))
246            .join("META-INF");
247        // SAFETY: we constructed the path as a descendant of mh
248        let root = unsafe { meta_inf.parent().unwrap_unchecked() };
249        macro_rules! err {
250            ($p:pat = $expr:expr;$id:ident) => {
251                #[allow(clippy::let_unit_value)]
252                let $p = match $expr {
253                    Ok(v) => v,
254                    Err(e) => return Err(NewArchiveError::$id(root.to_path_buf(), e)),
255                };
256            };
257        }
258        macro_rules! dump {
259            ($f:expr; $($t:tt)*) => {
260                err!(f = std::fs::File::create(&$f);Write);
261                if let Err(e) = write!(std::io::BufWriter::new(f),$($t)*) {
262                    return Err(NewArchiveError::Write($f, e));
263                }
264            };
265        }
266        err!(() = std::fs::create_dir_all(&meta_inf);CreateDir);
267        let manifest = meta_inf.join("MANIFEST.MF");
268        err!(mf = std::fs::File::create_new(&manifest);Write);
269        if let Err(e) = write!(
270            std::io::BufWriter::new(mf),
271            "id: {id}\nurl-base: {base_uri}\nformat: {}",
272            format.name
273        ) {
274            return Err(NewArchiveError::Write(manifest, e));
275        }
276        dump!(root.join(".gitignore");"{}",include_str!("gitignore_template.txt"));
277
278        let lib = root.join("lib");
279        err!(() = std::fs::create_dir_all(&lib);CreateDir);
280        let preamble = lib.join("preamble.tex");
281        dump!(preamble;"% preamble code for stex");
282
283        let source = root.join("source");
284        err!(() = std::fs::create_dir_all(&source);CreateDir);
285        let default = source.join(default_file);
286        dump!(default;"{}",content);
287        self.load_one(&manifest, RelPath::from_id(id))
288            .expect("this is a bug");
289        Ok(root.to_path_buf())
290    }
291
292    pub fn index(&self, external_url: &str) -> (Vec<Institution>, Vec<ArchiveIndex>) {
293        let tree = self.tree.read();
294        if let Some(idx) = (*tree.index.read()).clone() {
295            return idx;
296        }
297        let (is, ars) = tree.load_index(external_url);
298        *tree.index.write() = Some((is.clone(), ars.clone()));
299        drop(tree);
300        (is, ars)
301    }
302
303    pub fn index_async<A: AsyncEngine>(
304        external_url: impl Fn() -> &'static str + Send + Sync + 'static,
305    ) -> impl Future<Output = (Vec<Institution>, Vec<ArchiveIndex>)> + Send {
306        let tree = crate::backend::GlobalBackend.tree.read();
307        let idx = (*tree.index.read()).clone();
308        if let Some(idx) = idx {
309            return either::Left(std::future::ready(idx));
310        }
311        drop(tree);
312        either::Right(async move {
313            let (is, ars) = A::block_on(move || {
314                let tree = crate::backend::GlobalBackend.tree.read();
315                let (is, ars) = tree.load_index(external_url());
316                *tree.index.write() = Some((is.clone(), ars.clone())); //either::Left((is.clone(), ars.clone())));
317                drop(tree);
318                (is, ars)
319            })
320            .await;
321            (is, ars)
322        })
323    }
324}
325
326#[derive(Debug, Default)]
327pub struct ArchiveTree {
328    pub archives: Vec<Archive>,
329    pub top: Vec<ArchiveOrGroup>,
330    index: parking_lot::RwLock<
331        Option<
332            //either::Either<
333            (Vec<Institution>, Vec<ArchiveIndex>),
334            //    flume::Receiver<(Vec<Institution>, Vec<ArchiveIndex>)>,
335            //>,
336        >,
337    >, //pub index: (Vec<Institution>, Vec<ArchiveIndex>),
338}
339impl ArchiveTree {
340    pub fn with_index<R>(
341        &self,
342        external_url: &str,
343        f: impl FnOnce(&[ArchiveIndex], &[Institution]) -> R,
344    ) -> R {
345        let lock = self.index.read();
346        if let Some((inst, idx)) = &*lock {
347            return f(idx, inst);
348        }
349        drop(lock);
350        let (is, ars) = self.load_index(external_url);
351        let r = f(&ars, &is);
352        *self.index.write() = Some((is, ars));
353        r
354    }
355}
356
357#[derive(Debug)]
358pub enum ArchiveOrGroup {
359    Archive(ArchiveId),
360    Group(ArchiveGroup),
361}
362
363#[derive(Debug)]
364pub struct ArchiveGroup {
365    pub id: ArchiveId,
366    pub children: Vec<ArchiveOrGroup>,
367    pub state: FileStates,
368}
369
370pub trait MaybeTriple: Send {
371    #[cfg(feature = "rdf")]
372    fn add_triple(&mut self, quad: impl FnOnce() -> ulo::rdf_types::Triple);
373}
374impl MaybeTriple for () {
375    #[cfg(feature = "rdf")]
376    #[inline]
377    fn add_triple(&mut self, _: impl FnOnce() -> ulo::rdf_types::Triple) {}
378}
379#[cfg(feature = "rdf")]
380impl<F> MaybeTriple for F
381where
382    F: FnMut(ulo::rdf_types::Triple) + Send,
383{
384    #[inline]
385    fn add_triple(&mut self, quad: impl FnOnce() -> ulo::rdf_types::Triple) {
386        self(quad());
387    }
388}
389
390impl ArchiveTree {
391    #[must_use]
392    pub fn state(&self) -> FileStates {
393        let mut r = FileStates::default();
394        for aog in &self.top {
395            match aog {
396                ArchiveOrGroup::Archive(a) => {
397                    if let Some(Archive::Local(a)) = self.get(a) {
398                        r.merge_all(&a.file_state.read().state);
399                    }
400                }
401                ArchiveOrGroup::Group(g) => r.merge_all(&g.state),
402            }
403        }
404        r
405    }
406
407    #[must_use]
408    pub fn get_group_or_archive(&self, id: &ArchiveId) -> Option<&ArchiveOrGroup> {
409        let mut steps = id.steps().peekable();
410        let mut curr = &self.top;
411        while let Some(step) = steps.next() {
412            let e = curr
413                .binary_search_by_key(&step, |e| e.id().last())
414                .ok()
415                .map(|i| &curr[i])?;
416            if steps.peek().is_none() {
417                return Some(e);
418            }
419            if let ArchiveOrGroup::Group(g) = e {
420                curr = &g.children;
421            } else {
422                return None;
423            }
424        }
425        None
426    }
427
428    #[must_use]
429    pub fn get(&self, id: &ArchiveId) -> Option<&Archive> {
430        self.archives
431            .binary_search_by_key(&id, |a: &Archive| a.id())
432            .ok()
433            .map(|i| &self.archives[i])
434    }
435
436    #[allow(clippy::linkedlist)]
437    fn load(
438        &self,
439        paths: &[&Path],
440        skip_existent: bool,
441    ) -> std::collections::LinkedList<Vec<Archive>> {
442        use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
443        use spliter::ParallelSpliterator;
444        paths
445            .par_iter()
446            .flat_map(|p| {
447                crate::archive_iter::ManifestIterator::new(p)
448                    .par_split()
449                    .into_par_iter()
450                    .map(move |r| (p, r))
451                    .filter_map(|(mh, p)| {
452                        // SAFETY: manifest file is grandchild of root directory of archive
453                        let top_dir =
454                            unsafe { p.parent().unwrap_unchecked().parent().unwrap_unchecked() };
455                        let rel_path = top_dir.relative_to(mh)?;
456                        let Some(id): Option<ArchiveId> = rel_path.parse().ok() else {
457                            tracing::warn!("invalid archive id: {rel_path}");
458                            return None;
459                        };
460                        if skip_existent && self.get(&id).is_some() {
461                            return None;
462                        }
463                        match crate::manifest::parse_manifest(&p, rel_path) {
464                            Ok(r) => Some(r),
465                            Err(e) => {
466                                tracing::warn!("{e} in {rel_path}");
467                                None
468                            }
469                        }
470                    })
471                    .map(|a| {
472                        if let Archive::Local(a) = &a {
473                            a.update_sources();
474                        }
475                        a
476                    })
477            })
478            .collect_vec_list()
479        /*for n in news.into_iter().flatten() {
480            self.insert(n, &mut f);
481        }*/
482    }
483
484    fn insert(
485        &mut self,
486        archive: Archive,
487        #[cfg(feature = "rdf")] triple_store: &crate::triple_store::RDFStore,
488    ) {
489        #[cfg(feature = "rdf")]
490        let mut triples = vec![{
491            use ftml_uris::FtmlUri;
492            ulo::triple!(<(archive.uri().to_iri())>: ulo:library)
493        }];
494
495        let id = archive.id().clone();
496        let rel_path = RelPath::from_id(&id);
497        let steps = if let Some((group, _)) = rel_path.split_last() {
498            group.steps()
499        } else {
500            match self
501                .archives
502                .binary_search_by_key(&&id, |a: &Archive| a.id())
503            {
504                Ok(i) => self.archives[i] = archive,
505                Err(i) => self.archives.insert(i, archive),
506            }
507            match self
508                .top
509                .binary_search_by_key(&id.as_ref(), |v| v.id().last())
510            {
511                Ok(i) => self.top[i] = ArchiveOrGroup::Archive(id),
512                Err(i) => self.top.insert(i, ArchiveOrGroup::Archive(id)),
513            }
514            return;
515        };
516        let mut curr = &mut self.top;
517        let mut curr_name_len = 0;
518        let mut group = &id;
519        for step in steps {
520            if curr_name_len == 0 {
521                curr_name_len += step.len();
522            } else {
523                curr_name_len += step.len() + 1;
524            }
525            let curr_name = &id.as_ref()[..curr_name_len];
526            match curr.binary_search_by_key(&step, |v| v.id().last()) {
527                Ok(i) => {
528                    let ArchiveOrGroup::Group(g) = &mut curr[i]
529                    // TODO maybe reachable?
530                    else {
531                        unreachable!()
532                    };
533                    if let Archive::Local(a) = &archive {
534                        g.state.merge_all(a.file_state.read().state());
535                    }
536                    group = &g.id;
537                    curr = &mut g.children;
538                }
539                Err(i) => {
540                    let mut state = FileStates::default();
541                    if let Archive::Local(a) = &archive {
542                        state.merge_all(a.file_state.read().state());
543                    }
544                    let g = ArchiveGroup {
545                        // SAFETY: known to be valid
546                        id: unsafe { curr_name.parse().unwrap_unchecked() },
547                        children: Vec::new(),
548                        state,
549                    };
550                    curr.insert(i, ArchiveOrGroup::Group(g));
551                    let ArchiveOrGroup::Group(g) = &mut curr[i] else {
552                        unreachable!()
553                    };
554                    #[cfg(feature = "rdf")]
555                    {
556                        use ftml_uris::FtmlUri;
557                        let iri = (archive.uri().base.clone() & g.id.clone()).to_iri();
558                        if *group != id {
559                            let parent = (archive.uri().base.clone() & group.clone()).to_iri();
560                            triples.push(ulo::triple!(<(parent)> ulo:contains <(iri.clone())>));
561                        }
562                        triples.push(ulo::triple!(<(iri)>: ulo:library_group));
563                    }
564                    curr = &mut g.children;
565                }
566            }
567        }
568
569        #[cfg(feature = "rdf")]
570        {
571            use ftml_uris::FtmlUri;
572            let parent = (archive.uri().base.clone() & group.clone()).to_iri();
573            triples.push(ulo::triple!(<(parent)> ulo:contains <(archive.uri().to_iri())>));
574            let global = ulo::rdf_types::NamedNodeRef::new_unchecked("flams://archives");
575            triple_store.add_quads(triples.into_iter().map(|t| t.in_graph(global)));
576        }
577
578        match self
579            .archives
580            .binary_search_by_key(&&id, |a: &Archive| a.id())
581        {
582            Ok(i) => self.archives[i] = archive,
583            Err(i) => self.archives.insert(i, archive),
584        }
585        match curr.binary_search_by_key(&id.last(), |v| v.id().last()) {
586            Ok(i) => curr[i] = ArchiveOrGroup::Archive(id),
587            Err(i) => curr.insert(i, ArchiveOrGroup::Archive(id)),
588        }
589    }
590
591    fn load_index(&self, external_url: &str) -> (Vec<Institution>, Vec<ArchiveIndex>) {
592        let mut is = Vec::new();
593        let mut ai = Vec::new();
594        for a in &self.archives {
595            let Some(p) = crate::LocalArchive::manifest_of(a.path()) else {
596                continue;
597            };
598            let Some(p) = p.parent().map(|p| p.join("archive.json")) else {
599                continue;
600            };
601            let (isi, ars) = crate::manifest::read_archive_json(a.uri(), &p, external_url);
602            for i in isi {
603                if !is.contains(&i) {
604                    is.push(i);
605                }
606            }
607            for a in ars {
608                if !ai.contains(&a) {
609                    ai.push(a);
610                }
611            }
612        }
613        (is, ai)
614    }
615}
616
617impl ArchiveOrGroup {
618    #[inline]
619    #[must_use]
620    pub const fn id(&self) -> &ArchiveId {
621        match self {
622            Self::Archive(id) => id,
623            Self::Group(g) => &g.id,
624        }
625    }
626}
627
628impl<'a> TreeChild<'a> for &'a ArchiveOrGroup {
629    fn tree_children(self) -> impl Iterator<Item = Self> {
630        match self {
631            ArchiveOrGroup::Archive(_) => either::Either::Left(std::iter::empty()),
632            ArchiveOrGroup::Group(g) => either::Either::Right(g.children.iter()),
633        }
634    }
635}
636
637impl RefTree for ArchiveTree {
638    type Child<'a>
639        = &'a ArchiveOrGroup
640    where
641        Self: 'a;
642    #[inline]
643    fn tree_children(&self) -> impl Iterator<Item = Self::Child<'_>> {
644        self.top.iter()
645    }
646}
647impl RefTree for ArchiveGroup {
648    type Child<'a>
649        = &'a ArchiveOrGroup
650    where
651        Self: 'a;
652    #[inline]
653    fn tree_children(&self) -> impl Iterator<Item = Self::Child<'_>> {
654        self.children.iter()
655    }
656}