flams_system/backend/archives/
iter.rs1use flams_ontology::{archive_json::{ArchiveDatum, Institution}, uris::{ArchiveId, ArchiveURI, ArchiveURITrait, BaseURI}};
2use flams_utils::vecmap::{VecMap, VecSet};
3use parking_lot::RwLock;
4use std::{
5 fs::ReadDir,
6 path::{Path, PathBuf},
7};
8
9use crate::{backend::archives::{
10 ignore_regex::IgnoreSource, source_files::SourceDir, RepositoryData,
11}, formats::SourceFormat};
12
13use super::{ArchiveIndex, LocalArchive};
14
15pub(super) struct ArchiveIterator<'a> {
16 path: &'a Path,
17 stack: Vec<Vec<(PathBuf, String)>>,
18 curr: Option<std::fs::ReadDir>,
19 currp: String,
20 in_span: tracing::span::Span,
21}
22
23impl<'a> ArchiveIterator<'a> {
24 pub fn new(path: &'a Path) -> Self {
25 Self {
26 stack: vec![vec![]],
27 curr: std::fs::read_dir(path)
28 .map_err(|_| {
29 tracing::warn!(target:"archives","Could not read directory {}", path.display());
30 })
31 .ok(),
32 path,
33 currp: String::new(),
34 in_span: tracing::Span::current(),
35 }
36 }
37
38 fn next(
39 curr: &mut Option<ReadDir>,
40 stack: &mut Vec<Vec<(PathBuf, String)>>,
41 currp: &mut String,
42 ) -> Option<LocalArchive> {
43 loop {
44 let d = match curr.as_mut().and_then(ReadDir::next) {
45 None => {
46 if Self::next_dir(stack, curr, currp) {
47 continue;
48 }
49 return None;
50 }
51 Some(Ok(d)) => d,
52 _ => continue,
53 };
54 let Ok(md) = d.metadata() else { continue };
55 let path = d.path();
56
57 if md.is_dir() {
59 if d.file_name().to_str().is_none_or(|s| s.starts_with('.')) {
60 continue;
61 } else if d.file_name().eq_ignore_ascii_case("meta-inf") {
62 if let Some(path) = Self::find_manifest(&path) {
63 stack.pop();
64 return if let Some(m) = Self::do_manifest(&path, currp) {
65 if !Self::next_dir(stack, curr, currp) {
66 *curr = None;
67 }
68 Some(m)
69 } else {
70 if Self::next_dir(stack, curr, currp) {
71 continue;
72 }
73 None
74 };
75 }
76 }
77 let mut ins = currp.clone();
78 if !ins.is_empty() {
79 ins.push('/');
80 }
81 ins.push_str(d.file_name().to_str().unwrap_or_else(|| unreachable!()));
82 stack
83 .last_mut()
84 .unwrap_or_else(|| unreachable!())
85 .push((path, ins));
86 }
87 }
88 }
89
90 fn next_dir(
91 stack: &mut Vec<Vec<(PathBuf, String)>>,
92 curr: &mut Option<std::fs::ReadDir>,
93 currp: &mut String,
94 ) -> bool {
95 loop {
96 match stack.last_mut() {
97 None => return false,
98 Some(s) => match s.pop() {
99 Some((e, s)) => {
100 *curr = if let Ok(rd) = e.read_dir() {
101 Some(rd)
102 } else {
103 tracing::warn!(target:"archives","Could not read directory {}", e.display());
104 return false;
105 };
106 *currp = s;
107 stack.push(Vec::new());
108 return true;
109 }
110 None => {
111 stack.pop();
112 }
113 },
114 }
115 }
116 }
117
118 #[allow(clippy::cognitive_complexity)]
119 fn find_manifest(metainf: &Path) -> Option<PathBuf> {
120 tracing::trace!("Checking for manifest");
121 if let Ok(rd) = metainf.read_dir() {
122 for d in rd {
123 let d = match d {
124 Err(_) => {
125 tracing::warn!(target:"archives","Could not read directory {}", metainf.display());
126 continue;
127 }
128 Ok(d) => d,
129 };
130 if !d.file_name().eq_ignore_ascii_case("manifest.mf") {
131 continue;
132 }
133 let path = d.path();
134 if !path.is_file() {
135 continue;
136 }
137 return Some(path);
138 }
139 tracing::trace!("not found");
140 } else {
141 tracing::warn!(target:"archives","Could not read directory {}", metainf.display());
142 }
143 None
144 }
145
146 #[allow(clippy::cognitive_complexity)]
147 #[allow(clippy::too_many_lines)]
148 fn do_manifest(path: &Path, id: &str) -> Option<LocalArchive> {
149 use std::io::BufRead;
150 let Some(top_dir) = path.parent().and_then(Path::parent) else {
151 tracing::warn!(target:"archives","Could not find parent directory of {}", path.display());
152 return None;
153 };
154 let out_path = top_dir.join(".flams");
155 let Ok(reader) = std::fs::File::open(path) else {
156 tracing::warn!(target:"archives","Could not open manifest {}", path.display());
157 return None;
158 };
159 let reader = std::io::BufReader::new(reader);
160 let mut lines = reader.lines();
161
162 let mut formats = VecSet::default();
163 let mut dom_uri: String = String::new();
164 let mut dependencies = Vec::new();
165 let mut ignore = IgnoreSource::default();
166 let mut attributes: VecMap<Box<str>, Box<str>> = VecMap::default();
167 let mut had_id: bool = false;
168 loop {
169 let line = match lines.next() {
170 Some(Err(_)) => continue,
171 Some(Ok(l)) => l,
172 _ => break,
173 };
174 let (k, v) = match line.split_once(':') {
175 Some((k, v)) => (k.trim(), v.trim()),
176 _ => continue,
177 };
178 match k {
179 "id" => {
180 if v != id {
181 tracing::warn!(target:"archives","Archive {v}'s id does not match its location ({id})");
182 return None;
183 } else if v.is_empty() {
184 tracing::warn!(target:"archives","Archive {v} has an empty id");
185 return None;
186 }
187 had_id = true;
188 }
189 "format" => {
190 formats = v
191 .split(',')
192 .filter_map(SourceFormat::get_from_str)
193 .collect();
194 }
195 "url-base" => dom_uri = v.into(),
196 "dependencies" => {
198 for d in v
199 .split(',')
200 .map(str::trim)
201 .filter(|s| !s.is_empty() && *s != id)
202 {
203 dependencies.push(ArchiveId::new(d));
204 }
205 }
206 "ignore" => {
207 ignore = IgnoreSource::new(v, &top_dir.join("source")); }
209 _ => {
210 attributes.insert(k.into(), v.into());
211 }
212 }
213 }
214 if !had_id {
215 tracing::warn!(target:"archives","Archive {id} has no id");
216 return None;
217 }
218 let id = ArchiveId::new(id);
222 if formats.is_empty() && !id.is_meta() {
223 tracing::warn!(target:"archives","No formats found for archive {}",id);
224 return None;
225 }
226 if dom_uri.is_empty() {
227 tracing::warn!(target:"archives","Archive {} has no URL base", id);
228 return None;
229 }
230 let dom_uri: BaseURI = match dom_uri.parse() {
231 Ok(b) => b,
232 Err(e) => {
233 tracing::warn!(target:"archives","Archive {} has an invalid URL base: {}", id, e);
234 return None;
235 }
236 };
237 let uri = dom_uri & id;
238 let (institutions,index) = read_index_file(&uri,&path.with_file_name("archive.json"));
239 Some(LocalArchive {
240 out_path: out_path.into(),
241 ignore,
242 file_state: RwLock::new(SourceDir::default()),
243 #[cfg(feature="gitlab")]
244 is_managed: std::sync::OnceLock::new(),
245 data: RepositoryData {
248 uri,
249 attributes,
250 formats,
251 institutions,index,
252 dependencies: dependencies.into(),
253 },
254 })
255 }
256}
257
258fn read_index_file(archive:&ArchiveURI,path:&Path) -> (Box<[Institution]>,Box<[ArchiveIndex]>) {
259 if !path.exists() {
260 return (Vec::new().into(),Vec::new().into())
261 }
262 let reader = match std::fs::File::open(path) {
263 Ok(reader) => reader,
264 Err(e) => {
265 tracing::error!("Could not read index file {}: {e}", path.display());
266 return (Vec::new().into(),Vec::new().into())
267 }
268 };
269 let reader = std::io::BufReader::new(reader);
270 let v = match serde_json::from_reader::<_,Vec<ArchiveDatum>>(reader) {
271 Ok(v) => v,
272 Err(e) => {
273 tracing::error!("Invalid JSON file {}: {e}", path.display());
274 return (Vec::new().into(),Vec::new().into())
275 }
276 };
277 let mut insts = Vec::new();
278 let mut idxs = Vec::new();
279 for d in v {
280 match d {
281 ArchiveDatum::Document(mut d) => {
282 if d.teaser().is_none() {
283 let desc = path.with_file_name("desc.html");
284 if desc.exists() {
285 if let Ok(s) = std::fs::read_to_string(desc) {
286 d.set_teaser(s.into_boxed_str());
287 }
288 }
289 }
290 match ArchiveIndex::from_kind(d,archive,
291 |i| format!("{}/img?a={}&rp=source/{i}",crate::settings::Settings::get().external_url().unwrap_or(""),archive.archive_id()).into_boxed_str()
292 ) {
293 Ok(e) => idxs.push(e),
294 Err(e) => tracing::error!("Error in index file {}: {e:#}",path.display())
295 }
296 },
297 ArchiveDatum::Institution(i) => insts.push(match i {
298 Institution::University { title, place, country, url, acronym, logo }
299 => Institution::University { title, place, country, url, acronym,
300 logo: format!("{}/img?a={}&rp=source/{logo}",crate::settings::Settings::get().external_url().unwrap_or(""),archive.archive_id()).into_boxed_str()
301 },
302 Institution::School { title, place, country, url, acronym, logo }
303 => Institution::School { title, place, country, url, acronym,
304 logo: format!("{}/img?a={}&rp=source/{logo}",crate::settings::Settings::get().external_url().unwrap_or(""),archive.archive_id()).into_boxed_str()
305 }
306 }),
307 }
308 }
309 (insts.into(),idxs.into())
310}
311
312impl Iterator for ArchiveIterator<'_> {
313 type Item = LocalArchive;
314 fn next(&mut self) -> Option<Self::Item> {
315 let _span = self.in_span.enter();
316 Self::next(&mut self.curr, &mut self.stack, &mut self.currp)
317 }
318}
319
320impl spliter::Spliterator for ArchiveIterator<'_> {
321 fn split(&mut self) -> Option<Self> {
322 if self.stack.len() < 2 || self.stack[0].len() < 2 {
323 return None;
324 }
325 let stacksplit = self.stack[0].len() / 2;
326 let mut rightstack = self.stack[0].split_off(stacksplit);
327 std::mem::swap(&mut self.stack[0], &mut rightstack);
328 loop {
329 match rightstack.pop() {
330 None => return None,
331 Some((e, s)) => {
332 if let Ok(rd) = std::fs::read_dir(&e) {
333 return Some(Self {
334 path: self.path,
335 curr: Some(rd),
336 stack: vec![rightstack, Vec::new()],
337 currp: s,
338 in_span: self.in_span.clone(),
339 });
340 }
341 }
342 }
343 }
344 }
345}