flams_math_archives/utils/
lazy_file.rs

1use std::{
2    io::{Read, Seek, SeekFrom, Write},
3    path::{Path, PathBuf},
4};
5
6use either::Either;
7
8use crate::utils::{
9    AsyncEngine,
10    errors::{ReadError, WriteError},
11};
12
13#[derive(Debug, Clone)]
14pub struct LazyFile<const NUM_FIELDS: usize> {
15    path: PathBuf,
16    //file: Option<std::fs::File>,
17    offsets: [u32; NUM_FIELDS],
18}
19
20pub struct LazyFileReader<const NUM_FIELDS: usize> {
21    file: std::fs::File,
22    offsets: [u32; NUM_FIELDS],
23}
24
25pub struct LazyFileWriter<const NUM_FIELDS: usize> {
26    file: std::fs::File,
27    written: u64,
28    current_offset: u32,
29}
30
31impl<const NUM_FIELDS: usize> LazyFile<NUM_FIELDS> {
32    /// # Errors
33    #[inline]
34    pub fn new(path: PathBuf) -> Result<Self, std::io::Error> {
35        Ok(Self::new_i(path)?.0)
36    }
37
38    /// blocks?
39    /// # Errors
40    pub fn read(&self) -> Result<LazyFileReader<NUM_FIELDS>, std::io::Error> {
41        Ok(LazyFileReader {
42            file: std::fs::File::open(&self.path)?,
43            offsets: self.offsets,
44        })
45    }
46
47    /// # Errors
48    pub fn new_and_then<R>(
49        path: PathBuf,
50        then: impl FnOnce(LazyFileReader<NUM_FIELDS>) -> Result<R, ReadError>,
51    ) -> Result<(Self, R), ReadError> {
52        let (s, file) = Self::new_i(path)?;
53        let reader = LazyFileReader {
54            offsets: s.offsets,
55            file,
56        };
57        then(reader).map(|r| (s, r))
58    }
59
60    fn new_i(path: PathBuf) -> Result<(Self, std::fs::File), std::io::Error> {
61        if NUM_FIELDS == 0 {
62            return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Nope"));
63        }
64        let mut file = std::fs::File::open(&path)?;
65        let mut offsets = [[0u8; 4]; NUM_FIELDS];
66        // no const generics:
67        let buf_ref = unsafe {
68            std::slice::from_raw_parts_mut::<u8>(
69                std::ptr::from_mut(&mut offsets as &mut [[u8; 4]]).cast(),
70                4 * NUM_FIELDS,
71            )
72        };
73        let buf_ref = &mut buf_ref[0..4 * (NUM_FIELDS - 1)];
74        file.read_exact(buf_ref)?;
75        let offsets = offsets.map(u32::from_be_bytes);
76        Ok((
77            Self {
78                path,
79                //file: Some(file),
80                offsets,
81            },
82            file,
83        ))
84    }
85}
86impl<const NUM_FIELDS: usize> LazyFileReader<NUM_FIELDS> {
87    fn do_read<R>(
88        &mut self,
89        index: usize,
90        offset: u64,
91        then: impl FnOnce(&mut std::fs::File, Option<usize>) -> Result<R, ReadError>,
92    ) -> Result<R, ReadError> {
93        if NUM_FIELDS <= index {
94            return Err(ReadError::NumberOfFields {
95                max: NUM_FIELDS - 1,
96                index,
97            });
98        }
99        let offset = if index == 0 {
100            offset
101        } else {
102            let i: u64 = self.offsets[index - 1].into();
103            offset + i
104        };
105        let len = if index == NUM_FIELDS - 1 {
106            None
107        } else {
108            let i = self.offsets[index] as usize;
109            #[allow(clippy::cast_possible_truncation)]
110            Some(i - offset as usize)
111        };
112        let file = &mut self.file;
113        file.seek(SeekFrom::Start(offset + ((NUM_FIELDS - 1) as u64 * 4)))?;
114        then(file, len)
115    }
116    /// # Errors
117    pub fn read<T: bincode::de::Decode<()>>(&mut self, index: usize) -> Result<T, ReadError> {
118        self.do_read(index, 0, |file, _| {
119            Ok(bincode::decode_from_reader(
120                std::io::BufReader::new(file),
121                bincode::config::standard(),
122            )?)
123        })
124    }
125    /// # Errors
126    pub fn read_range<T: bincode::de::Decode<()>>(
127        &mut self,
128        index: usize,
129        offset: usize,
130    ) -> Result<T, ReadError> {
131        self.do_read(index, offset as u64, |file, _| {
132            Ok(bincode::decode_from_reader(
133                std::io::BufReader::new(file),
134                bincode::config::standard(),
135            )?)
136        })
137    }
138    /// # Errors
139    pub fn read_bytes(&mut self, index: usize) -> Result<Box<[u8]>, ReadError> {
140        self.do_read(index, 0, |file, len| {
141            if let Some(len) = len {
142                let mut ret = vec![0; len];
143                file.read_exact(&mut ret)?;
144                Ok(ret.into_boxed_slice())
145            } else {
146                let mut ret = Vec::new();
147                file.read_to_end(&mut ret)?;
148                Ok(ret.into_boxed_slice())
149            }
150        })
151    }
152
153    /// # Errors
154    pub fn read_string(&mut self, index: usize) -> Result<Box<str>, ReadError> {
155        self.do_read(index, 0, |file, len| {
156            if let Some(len) = len {
157                let mut ret = vec![0; len];
158                file.read_exact(&mut ret)?;
159                String::from_utf8(ret)
160                    .map_err(|e| {
161                        ReadError::Decode(bincode::error::DecodeError::OtherString(e.to_string()))
162                    })
163                    .map(|s| s.into_boxed_str())
164            } else {
165                let mut ret = String::new();
166                file.read_to_string(&mut ret)?;
167                Ok(ret.into_boxed_str())
168            }
169        })
170    }
171
172    /// # Errors
173    pub fn read_field_range(
174        &mut self,
175        index: usize,
176        start: usize,
177        length: usize,
178    ) -> Result<Vec<u8>, ReadError> {
179        self.do_read(index, start as u64, |file, _| {
180            let mut ret = vec![0; length];
181            file.read_exact(&mut ret)?;
182            Ok(ret)
183        })
184    }
185}
186
187impl<const NUM_FIELDS: usize> LazyFileWriter<NUM_FIELDS> {
188    /// # Errors
189    pub fn new(path: &Path) -> Result<Self, std::io::Error> {
190        if NUM_FIELDS == 0 {
191            return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Nope"));
192        }
193        let mut file = std::fs::File::create(path)?;
194        // no const generics forces us to do this:
195        let buf = [[0u8; 4]; NUM_FIELDS];
196        let buf_ref = unsafe {
197            std::slice::from_raw_parts::<u8>(
198                std::ptr::from_ref(&buf as &[_]).cast(),
199                4 * NUM_FIELDS,
200            )
201        };
202        let buf_ref = &buf_ref[0..4 * (NUM_FIELDS - 1)];
203        file.write_all(buf_ref)?;
204        Ok(Self {
205            file,
206            written: 0,
207            current_offset: 0,
208        })
209    }
210
211    fn write_offset(&mut self) -> Result<(), WriteError> {
212        if self.written == NUM_FIELDS as u64 {
213            return Ok(());
214        }
215        self.file.seek(SeekFrom::Start((self.written - 1) * 4))?;
216        self.file.write_all(&self.current_offset.to_be_bytes())?;
217        self.file.seek(SeekFrom::End(0)).map(|_| ())?;
218        Ok(())
219    }
220
221    /// # Errors
222    #[allow(clippy::cast_possible_truncation)]
223    pub fn write_bytes(&mut self, value: &[u8]) -> Result<(), WriteError> {
224        if self.written == NUM_FIELDS as u64 {
225            return Err(WriteError::NumberOfFields {
226                max: NUM_FIELDS - 1,
227                index: self.written as usize,
228            });
229        }
230        self.file.write_all(value)?;
231        self.written += 1;
232        self.current_offset += value.len() as u32;
233        self.write_offset()
234    }
235
236    /// # Errors
237    #[inline]
238    pub fn write_string(&mut self, value: &str) -> Result<(), WriteError> {
239        self.write_bytes(value.as_bytes())
240    }
241
242    /// # Errors
243    #[allow(clippy::cast_possible_truncation)]
244    pub fn write<T: bincode::Encode + std::fmt::Debug>(
245        &mut self,
246        value: &T,
247    ) -> Result<(), WriteError> {
248        if self.written == NUM_FIELDS as u64 {
249            return Err(WriteError::NumberOfFields {
250                max: NUM_FIELDS - 1,
251                index: self.written as usize,
252            });
253        }
254        let mut buf = std::io::BufWriter::new(&mut self.file);
255        let length = bincode::encode_into_std_write(value, &mut buf, bincode::config::standard())?;
256        buf.flush()?;
257        drop(buf);
258        self.written += 1;
259        self.current_offset += length as u32;
260        self.write_offset()
261    }
262}
263
264mod __private {
265    use crate::utils::errors::ReadError;
266
267    pub trait LazyField: Send + Sync + Clone {
268        fn get<const I: usize>(
269            index: usize,
270            reader: &mut super::LazyFileReader<I>,
271        ) -> Result<Self, ReadError>
272        where
273            Self: Sized;
274    }
275}
276pub trait LazyFieldValue: __private::LazyField {}
277impl<P: __private::LazyField> LazyFieldValue for P {}
278
279#[derive(Debug)]
280pub struct LazyField<V: LazyFieldValue, const INDEX: usize> {
281    #[allow(clippy::type_complexity)]
282    inner: std::sync::Arc<
283        parking_lot::RwLock<Either<Option<Result<V, ReadError>>, flume::Receiver<()>>>,
284    >,
285}
286impl<V: LazyFieldValue, const INDEX: usize> Default for LazyField<V, INDEX> {
287    #[inline]
288    fn default() -> Self {
289        Self {
290            inner: std::sync::Arc::new(parking_lot::RwLock::new(Either::Left(None))),
291        }
292    }
293}
294impl<V: LazyFieldValue + 'static, const INDEX: usize> LazyField<V, INDEX> {
295    #[inline]
296    pub fn maybe_get(&self) -> Option<Result<V, ReadError>> {
297        match &*self.inner.read() {
298            Either::Left(v) => v.clone(),
299            Either::Right(_) => None,
300        }
301    }
302
303    /// # Errors
304    pub fn get<const TOTAL: usize>(&self, reader: &LazyFile<TOTAL>) -> Result<V, ReadError> {
305        let inner = self.inner.read().clone();
306        match inner {
307            Either::Left(Some(v)) => v,
308            Either::Right(c) => {
309                let _ = c.recv();
310                self.get(reader)
311            }
312            Either::Left(None) => {
313                let mut reader = reader.read()?;
314                let (s, r) = flume::bounded(1);
315                *self.inner.write() = Either::Right(r);
316                let v = V::get(INDEX, &mut reader);
317                *self.inner.write() = Either::Left(Some(v.clone()));
318                while s.receiver_count() > 0 {
319                    let _ = s.send(());
320                }
321                v
322            }
323        }
324    }
325
326    /// # Errors
327    pub fn get_async<A: AsyncEngine, const TOTAL: usize>(
328        &self,
329        reader: &LazyFile<TOTAL>,
330    ) -> impl Future<Output = Result<V, ReadError>> + Send + use<V, INDEX, A, TOTAL>
331    where
332        V: 'static,
333    {
334        let inner = self.inner.read().clone();
335        match inner {
336            Either::Left(Some(v)) => either::Left(std::future::ready(v)),
337            Either::Right(c) => {
338                let inner = self.inner.clone();
339                let reader = reader.clone();
340                either::Right(either::Left(
341                    Box::pin(Self::fut_1::<A, TOTAL>(inner, reader, c))
342                        as std::pin::Pin<Box<dyn Future<Output = _> + Send>>,
343                ))
344            }
345            Either::Left(None) => {
346                let reader = match reader.read() {
347                    Ok(r) => r,
348                    Err(e) => return either::Left(std::future::ready(Err(e.into()))),
349                };
350                let (s, r) = flume::bounded(1);
351                *self.inner.write() = Either::Right(r);
352                let inner = self.inner.clone();
353                either::Right(either::Right(Self::fut_2::<A, TOTAL>(inner, reader, s)))
354            }
355        }
356    }
357
358    async fn fut_1<A: AsyncEngine, const TOTAL: usize>(
359        inner: std::sync::Arc<
360            parking_lot::RwLock<Either<Option<Result<V, ReadError>>, flume::Receiver<()>>>,
361        >,
362        reader: LazyFile<TOTAL>,
363        c: flume::Receiver<()>,
364    ) -> Result<V, ReadError> {
365        let _ = c.recv_async().await;
366        Self { inner }.get_async::<A, _>(&reader).await
367    }
368
369    async fn fut_2<A: AsyncEngine, const TOTAL: usize>(
370        inner: std::sync::Arc<
371            parking_lot::RwLock<Either<Option<Result<V, ReadError>>, flume::Receiver<()>>>,
372        >,
373        mut reader: LazyFileReader<TOTAL>,
374        s: flume::Sender<()>,
375    ) -> Result<V, ReadError> {
376        let v = A::block_on(move || V::get(INDEX, &mut reader)).await;
377        *inner.write() = Either::Left(Some(v.clone()));
378        while s.receiver_count() > 0 {
379            let _ = s.send_async(()).await;
380        }
381        v
382    }
383
384    /*
385    /// # Errors
386    pub fn load<const TOTAL: usize>(
387        &mut self,
388        reader: &mut LazyFileReader<'_, TOTAL>,
389    ) -> Result<(), ReadError> {
390        if self.inner.is_none() {
391            self.inner = Some(Ok(V::get(INDEX, reader)?));
392        }
393        Ok(())
394    }
395     */
396}
397
398#[cfg(feature = "deepsize")]
399impl<V: LazyFieldValue + deepsize::DeepSizeOf, const INDEX: usize> deepsize::DeepSizeOf
400    for LazyField<V, INDEX>
401{
402    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
403        if let either::Left(Some(Ok(v))) = &*self.inner.read() {
404            v.deep_size_of_children(context)
405        } else {
406            0
407        }
408    }
409}
410
411#[derive(Debug)]
412pub struct EagerField<V: LazyFieldValue, const INDEX: usize> {
413    inner: V,
414}
415impl<V: LazyFieldValue, const INDEX: usize> EagerField<V, INDEX> {
416    #[inline]
417    pub const fn get(&self) -> &V {
418        &self.inner
419    }
420
421    /// # Errors
422    pub fn new<const TOTAL: usize>(reader: &mut LazyFileReader<TOTAL>) -> Result<Self, ReadError> {
423        Ok(Self {
424            inner: V::get(INDEX, reader)?,
425        })
426    }
427}
428#[cfg(feature = "deepsize")]
429impl<V: LazyFieldValue + deepsize::DeepSizeOf, const INDEX: usize> deepsize::DeepSizeOf
430    for EagerField<V, INDEX>
431{
432    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
433        self.inner.deep_size_of_children(context)
434    }
435}
436
437impl<T: bincode::de::Decode<()> + Clone + Send + Sync> __private::LazyField for T {
438    fn get<const I: usize>(index: usize, reader: &mut LazyFileReader<I>) -> Result<Self, ReadError>
439    where
440        Self: Sized,
441    {
442        reader.read(index)
443    }
444}
445
446#[derive(Debug)]
447pub struct StreamField<const INDEX: usize>;
448impl<const INDEX: usize> StreamField<INDEX> {
449    /// # Errors
450    pub fn get<const TOTAL: usize>(&self, reader: &LazyFile<TOTAL>) -> Result<Box<str>, ReadError> {
451        reader.read()?.read_string(INDEX)
452    }
453
454    /// # Errors
455    pub fn get_range<const TOTAL: usize>(
456        &self,
457        reader: &LazyFile<TOTAL>,
458        offset: usize,
459        end: usize,
460    ) -> Result<Box<str>, ReadError> {
461        let bytes = reader
462            .read()?
463            .read_field_range(INDEX, offset, end - offset)?;
464        String::from_utf8(bytes)
465            .map_err(|e| ReadError::Decode(bincode::error::DecodeError::OtherString(e.to_string())))
466            .map(|s| s.into_boxed_str())
467    }
468}
469
470#[derive(Debug)]
471pub struct BytesField<const INDEX: usize>; /* {
472inner: BytesFieldI,
473}
474
475#[derive(Default, Debug)]
476enum BytesFieldI {
477#[default]
478None,
479Full(Box<[u8]>),
480Range(Box<[(usize, Box<[u8]>)]>),
481}
482 */
483impl<const INDEX: usize> BytesField<INDEX> {
484    /// # Errors
485    pub fn get<const TOTAL: usize>(
486        &self,
487        reader: &LazyFile<TOTAL>,
488    ) -> Result<Box<[u8]>, ReadError> {
489        reader.read()?.read_bytes(INDEX)
490    }
491
492    /// # Errors
493    pub fn get_range<const TOTAL: usize>(
494        &self,
495        reader: &LazyFile<TOTAL>,
496        offset: usize,
497        end: usize,
498    ) -> Result<Box<[u8]>, ReadError> {
499        let bytes = reader
500            .read()?
501            .read_field_range(INDEX, offset, end - offset)?;
502        Ok(bytes.into_boxed_slice())
503    }
504
505    /// # Errors
506    pub fn deserialize_range<const TOTAL: usize, T: bincode::de::Decode<()>>(
507        &self,
508        reader: &LazyFile<TOTAL>,
509        offset: usize,
510        _end: usize,
511    ) -> Result<T, ReadError> {
512        reader.read()?.read_range(INDEX, offset)
513    }
514}
515
516/*
517#[derive(Default)]
518pub struct StringField<const INDEX: usize> {
519    inner: StringFieldI,
520}
521#[derive(Default)]
522enum StringFieldI {
523    #[default]
524    None,
525    Full(Box<str>),
526    Range(Box<[(usize, Box<str>)]>),
527}
528 */