flams_system/backend/archives/
iter.rs

1use flams_ontology::{archive_json::{ArchiveDatum, Institution}, uris::{ArchiveId, ArchiveURI, ArchiveURITrait, BaseURI}};
2use flams_utils::vecmap::{VecMap, VecSet};
3use parking_lot::RwLock;
4use std::{
5    fs::ReadDir,
6    path::{Path, PathBuf},
7};
8
9use crate::{backend::archives::{
10    ignore_regex::IgnoreSource, source_files::SourceDir, RepositoryData,
11}, formats::SourceFormat};
12
13use super::{ArchiveIndex, LocalArchive};
14
15pub(super) struct ArchiveIterator<'a> {
16    path: &'a Path,
17    stack: Vec<Vec<(PathBuf, String)>>,
18    curr: Option<std::fs::ReadDir>,
19    currp: String,
20    in_span: tracing::span::Span,
21}
22
23impl<'a> ArchiveIterator<'a> {
24    pub fn new(path: &'a Path) -> Self {
25        Self {
26            stack: vec![vec![]],
27            curr: std::fs::read_dir(path)
28                .map_err(|_| {
29                    tracing::warn!(target:"archives","Could not read directory {}", path.display());
30                })
31                .ok(),
32            path,
33            currp: String::new(),
34            in_span: tracing::Span::current(),
35        }
36    }
37
38    fn next(
39        curr: &mut Option<ReadDir>,
40        stack: &mut Vec<Vec<(PathBuf, String)>>,
41        currp: &mut String,
42    ) -> Option<LocalArchive> {
43        loop {
44            let d = match curr.as_mut().and_then(ReadDir::next) {
45                None => {
46                    if Self::next_dir(stack, curr, currp) {
47                        continue;
48                    }
49                    return None;
50                }
51                Some(Ok(d)) => d,
52                _ => continue,
53            };
54            let Ok(md) = d.metadata() else { continue };
55            let path = d.path();
56
57            //let _span = tracing::debug_span!(target:"archives","checking","{}",path.display()).entered();
58            if md.is_dir() {
59                if d.file_name().to_str().is_none_or(|s| s.starts_with('.')) {
60                    continue;
61                } else if d.file_name().eq_ignore_ascii_case("meta-inf") {
62                    if let Some(path) = Self::find_manifest(&path) {
63                        stack.pop();
64                        return if let Some(m) = Self::do_manifest(&path, currp) {
65                            if !Self::next_dir(stack, curr, currp) {
66                                *curr = None;
67                            }
68                            Some(m)
69                        } else {
70                            if Self::next_dir(stack, curr, currp) {
71                                continue;
72                            }
73                            None
74                        };
75                    }
76                }
77                let mut ins = currp.clone();
78                if !ins.is_empty() {
79                    ins.push('/');
80                }
81                ins.push_str(d.file_name().to_str().unwrap_or_else(|| unreachable!()));
82                stack
83                    .last_mut()
84                    .unwrap_or_else(|| unreachable!())
85                    .push((path, ins));
86            }
87        }
88    }
89
90    fn next_dir(
91        stack: &mut Vec<Vec<(PathBuf, String)>>,
92        curr: &mut Option<std::fs::ReadDir>,
93        currp: &mut String,
94    ) -> bool {
95        loop {
96            match stack.last_mut() {
97                None => return false,
98                Some(s) => match s.pop() {
99                    Some((e, s)) => {
100                        *curr = if let Ok(rd) = e.read_dir() {
101                            Some(rd)
102                        } else {
103                            tracing::warn!(target:"archives","Could not read directory {}", e.display());
104                            return false;
105                        };
106                        *currp = s;
107                        stack.push(Vec::new());
108                        return true;
109                    }
110                    None => {
111                        stack.pop();
112                    }
113                },
114            }
115        }
116    }
117
118    #[allow(clippy::cognitive_complexity)]
119    fn find_manifest(metainf: &Path) -> Option<PathBuf> {
120        tracing::trace!("Checking for manifest");
121        if let Ok(rd) = metainf.read_dir() {
122            for d in rd {
123                let d = match d {
124                    Err(_) => {
125                        tracing::warn!(target:"archives","Could not read directory {}", metainf.display());
126                        continue;
127                    }
128                    Ok(d) => d,
129                };
130                if !d.file_name().eq_ignore_ascii_case("manifest.mf") {
131                    continue;
132                }
133                let path = d.path();
134                if !path.is_file() {
135                    continue;
136                }
137                return Some(path);
138            }
139            tracing::trace!("not found");
140        } else {
141            tracing::warn!(target:"archives","Could not read directory {}", metainf.display());
142        }
143        None
144    }
145
146    #[allow(clippy::cognitive_complexity)]
147    #[allow(clippy::too_many_lines)]
148    fn do_manifest(path: &Path, id: &str) -> Option<LocalArchive> {
149        use std::io::BufRead;
150        let Some(top_dir) = path.parent().and_then(Path::parent) else {
151            tracing::warn!(target:"archives","Could not find parent directory of {}", path.display());
152            return None;
153        };
154        let out_path = top_dir.join(".flams");
155        let Ok(reader) = std::fs::File::open(path) else {
156            tracing::warn!(target:"archives","Could not open manifest {}", path.display());
157            return None;
158        };
159        let reader = std::io::BufReader::new(reader);
160        let mut lines = reader.lines();
161
162        let mut formats = VecSet::default();
163        let mut dom_uri: String = String::new();
164        let mut dependencies = Vec::new();
165        let mut ignore = IgnoreSource::default();
166        let mut attributes: VecMap<Box<str>, Box<str>> = VecMap::default();
167        let mut had_id: bool = false;
168        loop {
169            let line = match lines.next() {
170                Some(Err(_)) => continue,
171                Some(Ok(l)) => l,
172                _ => break,
173            };
174            let (k, v) = match line.split_once(':') {
175                Some((k, v)) => (k.trim(), v.trim()),
176                _ => continue,
177            };
178            match k {
179                "id" => {
180                    if v != id {
181                        tracing::warn!(target:"archives","Archive {v}'s id does not match its location ({id})");
182                        return None;
183                    } else if v.is_empty() {
184                        tracing::warn!(target:"archives","Archive {v} has an empty id");
185                        return None;
186                    }
187                    had_id = true;
188                }
189                "format" => {
190                    formats = v
191                        .split(',')
192                        .filter_map(SourceFormat::get_from_str)
193                        .collect();
194                }
195                "url-base" => dom_uri = v.into(),
196                //"ns" => dom_uri = v.into(),
197                "dependencies" => {
198                    for d in v
199                        .split(',')
200                        .map(str::trim)
201                        .filter(|s| !s.is_empty() && *s != id)
202                    {
203                        dependencies.push(ArchiveId::new(d));
204                    }
205                }
206                "ignore" => {
207                    ignore = IgnoreSource::new(v, &top_dir.join("source")); //Some(v.into());
208                }
209                _ => {
210                    attributes.insert(k.into(), v.into());
211                }
212            }
213        }
214        if !had_id {
215            tracing::warn!(target:"archives","Archive {id} has no id");
216            return None;
217        }
218        /*if dom_uri.ends_with(id) {
219            dom_uri.split_off(id.len() + 1);
220        }*/
221        let id = ArchiveId::new(id);
222        if formats.is_empty() && !id.is_meta() {
223            tracing::warn!(target:"archives","No formats found for archive {}",id);
224            return None;
225        }
226        if dom_uri.is_empty() {
227            tracing::warn!(target:"archives","Archive {} has no URL base", id);
228            return None;
229        }
230        let dom_uri: BaseURI = match dom_uri.parse() {
231            Ok(b) => b,
232            Err(e) => {
233                tracing::warn!(target:"archives","Archive {} has an invalid URL base: {}", id, e);
234                return None;
235            }
236        };
237        let uri = dom_uri & id;
238        let (institutions,index) = read_index_file(&uri,&path.with_file_name("archive.json"));
239        Some(LocalArchive {
240            out_path: out_path.into(),
241            ignore,
242            file_state: RwLock::new(SourceDir::default()),
243            #[cfg(feature="gitlab")]
244            is_managed: std::sync::OnceLock::new(),
245            //#[cfg(feature="zip")]
246            //zip_file: std::sync::Arc::new(std::sync::OnceLock::new()),
247            data: RepositoryData {
248                uri,
249                attributes,
250                formats,
251                institutions,index,
252                dependencies: dependencies.into(),
253            },
254        })
255    }
256}
257
258fn read_index_file(archive:&ArchiveURI,path:&Path) -> (Box<[Institution]>,Box<[ArchiveIndex]>) {
259    if !path.exists() {
260        return (Vec::new().into(),Vec::new().into())
261    }
262    let reader = match std::fs::File::open(path) {
263        Ok(reader) => reader,
264        Err(e) => {
265            tracing::error!("Could not read index file {}: {e}", path.display());
266            return (Vec::new().into(),Vec::new().into())
267        }
268    };
269    let reader = std::io::BufReader::new(reader);
270    let v = match serde_json::from_reader::<_,Vec<ArchiveDatum>>(reader) {
271        Ok(v) => v,
272        Err(e) => {
273            tracing::error!("Invalid JSON file {}: {e}", path.display());
274            return (Vec::new().into(),Vec::new().into())
275        }
276    };
277    let mut insts = Vec::new();
278    let mut idxs = Vec::new();
279    for d in v {
280        match d {
281            ArchiveDatum::Document(mut d) => {
282                if d.teaser().is_none() {
283                    let desc = path.with_file_name("desc.html");
284                    if desc.exists() {
285                        if let Ok(s) = std::fs::read_to_string(desc) {
286                            d.set_teaser(s.into_boxed_str());
287                        }
288                    }
289                }
290                match ArchiveIndex::from_kind(d,archive,
291                    |i| format!("{}/img?a={}&rp=source/{i}",crate::settings::Settings::get().external_url().unwrap_or(""),archive.archive_id()).into_boxed_str()
292                ) {
293                    Ok(e) => idxs.push(e),
294                    Err(e) => tracing::error!("Error in index file {}: {e:#}",path.display())
295                }
296            },
297            ArchiveDatum::Institution(i) => insts.push(match i {
298                Institution::University { title, place, country, url, acronym, logo }
299                    => Institution::University { title, place, country, url, acronym, 
300                        logo: format!("{}/img?a={}&rp=source/{logo}",crate::settings::Settings::get().external_url().unwrap_or(""),archive.archive_id()).into_boxed_str()
301                    },
302                Institution::School { title, place, country, url, acronym, logo }
303                    => Institution::School { title, place, country, url, acronym, 
304                        logo: format!("{}/img?a={}&rp=source/{logo}",crate::settings::Settings::get().external_url().unwrap_or(""),archive.archive_id()).into_boxed_str()
305                    }
306            }),
307        }
308    }
309    (insts.into(),idxs.into())
310}
311
312impl Iterator for ArchiveIterator<'_> {
313    type Item = LocalArchive;
314    fn next(&mut self) -> Option<Self::Item> {
315        let _span = self.in_span.enter();
316        Self::next(&mut self.curr, &mut self.stack, &mut self.currp)
317    }
318}
319
320impl spliter::Spliterator for ArchiveIterator<'_> {
321    fn split(&mut self) -> Option<Self> {
322        if self.stack.len() < 2 || self.stack[0].len() < 2 {
323            return None;
324        }
325        let stacksplit = self.stack[0].len() / 2;
326        let mut rightstack = self.stack[0].split_off(stacksplit);
327        std::mem::swap(&mut self.stack[0], &mut rightstack);
328        loop {
329            match rightstack.pop() {
330                None => return None,
331                Some((e, s)) => {
332                    if let Ok(rd) = std::fs::read_dir(&e) {
333                        return Some(Self {
334                            path: self.path,
335                            curr: Some(rd),
336                            stack: vec![rightstack, Vec::new()],
337                            currp: s,
338                            in_span: self.in_span.clone(),
339                        });
340                    }
341                }
342            }
343        }
344    }
345}