Skip to main content

flams_math_archives/utils/
lazy_file.rs

1use std::{
2    io::{Read, Seek, SeekFrom, Write},
3    path::{Path, PathBuf},
4};
5
6use either::Either;
7
8use crate::utils::{
9    AsyncEngine,
10    errors::{ReadError, WriteError},
11};
12
13#[derive(Debug, Clone)]
14pub struct LazyFile<const NUM_FIELDS: usize> {
15    path: PathBuf,
16    //file: Option<std::fs::File>,
17    offsets: [u32; NUM_FIELDS],
18}
19
20pub struct LazyFileReader<const NUM_FIELDS: usize> {
21    file: std::fs::File,
22    offsets: [u32; NUM_FIELDS],
23}
24
25pub struct LazyFileWriter<const NUM_FIELDS: usize> {
26    file: std::fs::File,
27    written: u64,
28    current_offset: u32,
29}
30
31impl<const NUM_FIELDS: usize> LazyFile<NUM_FIELDS> {
32    /// # Errors
33    #[inline]
34    pub fn new(path: PathBuf) -> Result<Self, std::io::Error> {
35        Ok(Self::new_i(path)?.0)
36    }
37
38    /// blocks?
39    /// # Errors
40    pub fn read(&self) -> Result<LazyFileReader<NUM_FIELDS>, std::io::Error> {
41        Ok(LazyFileReader {
42            file: std::fs::File::open(&self.path)?,
43            offsets: self.offsets,
44        })
45    }
46
47    /// # Errors
48    pub fn new_and_then<R>(
49        path: PathBuf,
50        then: impl FnOnce(LazyFileReader<NUM_FIELDS>) -> Result<R, ReadError>,
51    ) -> Result<(Self, R), ReadError> {
52        let (s, file) = Self::new_i(path)?;
53        let reader = LazyFileReader {
54            offsets: s.offsets,
55            file,
56        };
57        then(reader).map(|r| (s, r))
58    }
59
60    fn new_i(path: PathBuf) -> Result<(Self, std::fs::File), std::io::Error> {
61        if NUM_FIELDS == 0 {
62            return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Nope"));
63        }
64        let mut file = std::fs::File::open(&path)?;
65        let mut offsets = [[0u8; 4]; NUM_FIELDS];
66        // no const generics:
67        let buf_ref = unsafe {
68            std::slice::from_raw_parts_mut::<u8>(
69                std::ptr::from_mut(&mut offsets as &mut [[u8; 4]]).cast(),
70                4 * NUM_FIELDS,
71            )
72        };
73        let buf_ref = &mut buf_ref[0..4 * (NUM_FIELDS - 1)];
74        file.read_exact(buf_ref)?;
75        let offsets = offsets.map(u32::from_be_bytes);
76        Ok((
77            Self {
78                path,
79                //file: Some(file),
80                offsets,
81            },
82            file,
83        ))
84    }
85}
86impl<const NUM_FIELDS: usize> LazyFileReader<NUM_FIELDS> {
87    fn do_read<R>(
88        &mut self,
89        index: usize,
90        offset: u64,
91        then: impl FnOnce(&mut std::fs::File, Option<usize>) -> Result<R, ReadError>,
92    ) -> Result<R, ReadError> {
93        if NUM_FIELDS <= index {
94            return Err(ReadError::NumberOfFields {
95                max: NUM_FIELDS - 1,
96                index,
97            });
98        }
99        let offset = if index == 0 {
100            offset
101        } else {
102            let i: u64 = self.offsets[index - 1].into();
103            offset + i
104        };
105        let len = if index == NUM_FIELDS - 1 {
106            None
107        } else {
108            let i = self.offsets[index] as usize;
109            #[allow(clippy::cast_possible_truncation)]
110            Some(i - offset as usize)
111        };
112        let file = &mut self.file;
113        file.seek(SeekFrom::Start(offset + ((NUM_FIELDS - 1) as u64 * 4)))?;
114        then(file, len)
115    }
116    /// # Errors
117    pub fn read<T: bincode::de::Decode<()>>(&mut self, index: usize) -> Result<T, ReadError> {
118        self.do_read(index, 0, |file, _| {
119            Ok(bincode::decode_from_reader(
120                std::io::BufReader::new(file),
121                bincode::config::standard(),
122            )?)
123        })
124    }
125    /// # Errors
126    pub fn read_range<T: bincode::de::Decode<()>>(
127        &mut self,
128        index: usize,
129        offset: usize,
130    ) -> Result<T, ReadError> {
131        self.do_read(index, offset as u64, |file, _| {
132            Ok(bincode::decode_from_reader(
133                std::io::BufReader::new(file),
134                bincode::config::standard(),
135            )?)
136        })
137    }
138    /// # Errors
139    pub fn read_bytes(&mut self, index: usize) -> Result<Box<[u8]>, ReadError> {
140        self.do_read(index, 0, |file, len| {
141            if let Some(len) = len {
142                let mut ret = vec![0; len];
143                file.read_exact(&mut ret)?;
144                Ok(ret.into_boxed_slice())
145            } else {
146                let mut ret = Vec::new();
147                file.read_to_end(&mut ret)?;
148                Ok(ret.into_boxed_slice())
149            }
150        })
151    }
152
153    /// # Errors
154    pub fn read_string(&mut self, index: usize) -> Result<Box<str>, ReadError> {
155        self.do_read(index, 0, |file, len| {
156            if let Some(len) = len {
157                let mut ret = vec![0; len];
158                file.read_exact(&mut ret)?;
159                String::from_utf8(ret)
160                    .map_err(|e| {
161                        ReadError::Decode(bincode::error::DecodeError::OtherString(e.to_string()))
162                    })
163                    .map(String::into_boxed_str)
164            } else {
165                let mut ret = String::new();
166                file.read_to_string(&mut ret)?;
167                Ok(ret.into_boxed_str())
168            }
169        })
170    }
171
172    /// # Errors
173    pub fn read_field_range(
174        &mut self,
175        index: usize,
176        start: usize,
177        length: usize,
178    ) -> Result<Vec<u8>, ReadError> {
179        self.do_read(index, start as u64, |file, _| {
180            let mut ret = vec![0; length];
181            file.read_exact(&mut ret)?;
182            Ok(ret)
183        })
184    }
185}
186
187impl<const NUM_FIELDS: usize> LazyFileWriter<NUM_FIELDS> {
188    /// # Errors
189    pub fn new(path: &Path) -> Result<Self, std::io::Error> {
190        if NUM_FIELDS == 0 {
191            return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Nope"));
192        }
193        let mut file = std::fs::File::create(path)?;
194        // no const generics forces us to do this:
195        let buf = [[0u8; 4]; NUM_FIELDS];
196        let buf_ref = unsafe {
197            std::slice::from_raw_parts::<u8>(
198                std::ptr::from_ref(&buf as &[_]).cast(),
199                4 * NUM_FIELDS,
200            )
201        };
202        let buf_ref = &buf_ref[0..4 * (NUM_FIELDS - 1)];
203        file.write_all(buf_ref)?;
204        Ok(Self {
205            file,
206            written: 0,
207            current_offset: 0,
208        })
209    }
210
211    fn write_offset(&mut self) -> Result<(), WriteError> {
212        if self.written == NUM_FIELDS as u64 {
213            return Ok(());
214        }
215        self.file.seek(SeekFrom::Start((self.written - 1) * 4))?;
216        self.file.write_all(&self.current_offset.to_be_bytes())?;
217        self.file.seek(SeekFrom::End(0)).map(|_| ())?;
218        Ok(())
219    }
220
221    /// # Errors
222    #[allow(clippy::cast_possible_truncation)]
223    pub fn write_bytes(&mut self, value: &[u8]) -> Result<(), WriteError> {
224        if self.written == NUM_FIELDS as u64 {
225            return Err(WriteError::NumberOfFields {
226                max: NUM_FIELDS - 1,
227                index: self.written as usize,
228            });
229        }
230        self.file.write_all(value)?;
231        self.written += 1;
232        self.current_offset += value.len() as u32;
233        self.write_offset()
234    }
235
236    /// # Errors
237    #[inline]
238    pub fn write_string(&mut self, value: &str) -> Result<(), WriteError> {
239        self.write_bytes(value.as_bytes())
240    }
241
242    /// # Errors
243    #[allow(clippy::cast_possible_truncation)]
244    pub fn write<T: bincode::Encode + std::fmt::Debug>(
245        &mut self,
246        value: &T,
247    ) -> Result<(), WriteError> {
248        if self.written == NUM_FIELDS as u64 {
249            return Err(WriteError::NumberOfFields {
250                max: NUM_FIELDS - 1,
251                index: self.written as usize,
252            });
253        }
254        let mut buf = std::io::BufWriter::new(&mut self.file);
255        let length = bincode::encode_into_std_write(value, &mut buf, bincode::config::standard())?;
256        buf.flush()?;
257        drop(buf);
258        self.written += 1;
259        self.current_offset += length as u32;
260        self.write_offset()
261    }
262}
263
264mod __private {
265    use crate::utils::errors::ReadError;
266
267    pub trait LazyField: Send + Sync + Clone {
268        fn get<const I: usize>(
269            index: usize,
270            reader: &mut super::LazyFileReader<I>,
271        ) -> Result<Self, ReadError>
272        where
273            Self: Sized;
274    }
275}
276pub trait LazyFieldValue: __private::LazyField {}
277impl<P: __private::LazyField> LazyFieldValue for P {}
278
279#[derive(Debug)]
280pub struct LazyField<V: LazyFieldValue, const INDEX: usize> {
281    inner: parking_lot::Mutex<Option<ftml_backend::utils::async_cache::Awaitable<V, ReadError>>>, /*#[allow(clippy::type_complexity)]
282                                                                                                  inner: std::sync::Arc<
283                                                                                                      parking_lot::RwLock<Either<Option<Result<V, ReadError>>, flume::Receiver<()>>>,
284                                                                                                  >,*/
285}
286impl<V: LazyFieldValue, const INDEX: usize> Default for LazyField<V, INDEX> {
287    #[inline]
288    fn default() -> Self {
289        Self {
290            inner: parking_lot::Mutex::new(None), //std::sync::Arc::new(parking_lot::RwLock::new(Either::Left(None))),
291        }
292    }
293}
294impl<V: LazyFieldValue + 'static, const INDEX: usize> LazyField<V, INDEX> {
295    #[inline]
296    /// blocks!
297    pub fn maybe_get(&self) -> Option<Result<V, ReadError>> {
298        let lock = self.inner.lock();
299        let v = lock.as_ref()?.clone();
300        drop(lock);
301        Some(v.get_sync())
302    }
303
304    /// # Errors
305    pub fn get<const TOTAL: usize>(&self, reader: &LazyFile<TOTAL>) -> Result<V, ReadError> {
306        let mut lock = self.inner.lock();
307        if let Some(inner) = &*lock {
308            let a = inner.clone();
309            drop(lock);
310            a.get_sync()
311        } else {
312            let (a, r, sender) = ftml_backend::utils::async_cache::Awaitable::new_sync();
313            *lock = Some(a);
314            drop(lock);
315            let mut reader = reader.read()?;
316            let v = V::get(INDEX, &mut reader);
317            let mut lock = r.0.lock();
318            if let Ok(r) = &mut lock {
319                **r = Some(v.clone());
320            };
321            drop(lock);
322            r.1.notify_all();
323            if sender.receiver_count() > 0 {
324                let _ = sender.broadcast_blocking(true);
325            }
326            v
327        }
328        /*
329        let inner = self.inner.read().clone();
330        match inner {
331            Either::Left(Some(v)) => v,
332            Either::Right(c) => {
333                let _ = c.recv();
334                self.get(reader)
335            }
336            Either::Left(None) => {
337                let mut reader = reader.read()?;
338                let (s, r) = flume::bounded(1);
339                *self.inner.write() = Either::Right(r);
340                let v = V::get(INDEX, &mut reader);
341                *self.inner.write() = Either::Left(Some(v.clone()));
342                while s.receiver_count() > 0 {
343                    let _ = s.send(());
344                }
345                v
346            }
347        } */
348    }
349
350    /// # Errors
351    pub fn get_async<A: AsyncEngine, const TOTAL: usize>(
352        &self,
353        reader: &LazyFile<TOTAL>,
354    ) -> impl Future<Output = Result<V, ReadError>> + Send + use<V, INDEX, A, TOTAL>
355    where
356        V: 'static,
357    {
358        let mut lock = self.inner.lock();
359        if let Some(inner) = &*lock {
360            let a = inner.clone();
361            drop(lock);
362            return either::Left(a.get());
363        }
364        let reader = reader.clone();
365        let fut = A::block_on(move || {
366            let mut reader = reader.read()?;
367            V::get(INDEX, &mut reader)
368        });
369        let (a, r) = ftml_backend::utils::async_cache::Awaitable::new_fut(fut);
370        *lock = Some(a);
371        drop(lock);
372        either::Right(r.get())
373        /*
374        let inner = self.inner.read().clone();
375        match inner {
376            Either::Left(Some(v)) => either::Left(std::future::ready(v)),
377            Either::Right(c) => {
378                let inner = self.inner.clone();
379                let reader = reader.clone();
380                either::Right(either::Left(
381                    Box::pin(Self::fut_1::<A, TOTAL>(inner, reader, c))
382                        as std::pin::Pin<Box<dyn Future<Output = _> + Send>>,
383                ))
384            }
385            Either::Left(None) => {
386                let reader = match reader.read() {
387                    Ok(r) => r,
388                    Err(e) => return either::Left(std::future::ready(Err(e.into()))),
389                };
390                let (s, r) = flume::bounded(1);
391                *self.inner.write() = Either::Right(r);
392                let inner = self.inner.clone();
393                either::Right(either::Right(Self::fut_2::<A, TOTAL>(inner, reader, s)))
394            }
395        }
396         */
397    }
398
399    /*
400    async fn fut_1<A: AsyncEngine, const TOTAL: usize>(
401        inner: std::sync::Arc<
402            parking_lot::RwLock<Either<Option<Result<V, ReadError>>, flume::Receiver<()>>>,
403        >,
404        reader: LazyFile<TOTAL>,
405        c: flume::Receiver<()>,
406    ) -> Result<V, ReadError> {
407        let _ = c.recv_async().await;
408        Self { inner }.get_async::<A, _>(&reader).await
409    }
410
411    async fn fut_2<A: AsyncEngine, const TOTAL: usize>(
412        inner: std::sync::Arc<
413            parking_lot::RwLock<Either<Option<Result<V, ReadError>>, flume::Receiver<()>>>,
414        >,
415        mut reader: LazyFileReader<TOTAL>,
416        s: flume::Sender<()>,
417    ) -> Result<V, ReadError> {
418        let v = A::block_on(move || V::get(INDEX, &mut reader)).await;
419        *inner.write() = Either::Left(Some(v.clone()));
420        while s.receiver_count() > 0 {
421            let _ = s.send_async(()).await;
422        }
423        v
424    }
425     */
426
427    /*
428    /// # Errors
429    pub fn load<const TOTAL: usize>(
430        &mut self,
431        reader: &mut LazyFileReader<'_, TOTAL>,
432    ) -> Result<(), ReadError> {
433        if self.inner.is_none() {
434            self.inner = Some(Ok(V::get(INDEX, reader)?));
435        }
436        Ok(())
437    }
438     */
439}
440
441#[cfg(feature = "deepsize")]
442impl<V: LazyFieldValue + deepsize::DeepSizeOf, const INDEX: usize> deepsize::DeepSizeOf
443    for LazyField<V, INDEX>
444{
445    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
446        if let Some(v) = &*self.inner.lock()
447            && let Ok(Some(Ok(v))) = v.inner.0.lock().as_deref()
448        {
449            v.deep_size_of_children(context)
450        } else {
451            0
452        }
453    }
454}
455
456#[derive(Debug)]
457pub struct EagerField<V: LazyFieldValue, const INDEX: usize> {
458    inner: V,
459}
460impl<V: LazyFieldValue, const INDEX: usize> EagerField<V, INDEX> {
461    #[inline]
462    pub const fn get(&self) -> &V {
463        &self.inner
464    }
465
466    /// # Errors
467    pub fn new<const TOTAL: usize>(reader: &mut LazyFileReader<TOTAL>) -> Result<Self, ReadError> {
468        Ok(Self {
469            inner: V::get(INDEX, reader)?,
470        })
471    }
472}
473#[cfg(feature = "deepsize")]
474impl<V: LazyFieldValue + deepsize::DeepSizeOf, const INDEX: usize> deepsize::DeepSizeOf
475    for EagerField<V, INDEX>
476{
477    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
478        self.inner.deep_size_of_children(context)
479    }
480}
481
482impl<T: bincode::de::Decode<()> + Clone + Send + Sync> __private::LazyField for T {
483    fn get<const I: usize>(index: usize, reader: &mut LazyFileReader<I>) -> Result<Self, ReadError>
484    where
485        Self: Sized,
486    {
487        reader.read(index)
488    }
489}
490
491#[derive(Debug)]
492pub struct StreamField<const INDEX: usize>;
493impl<const INDEX: usize> StreamField<INDEX> {
494    /// # Errors
495    pub fn get<const TOTAL: usize>(&self, reader: &LazyFile<TOTAL>) -> Result<Box<str>, ReadError> {
496        reader.read()?.read_string(INDEX)
497    }
498
499    /// # Errors
500    pub fn get_range<const TOTAL: usize>(
501        &self,
502        reader: &LazyFile<TOTAL>,
503        offset: usize,
504        end: usize,
505    ) -> Result<Box<str>, ReadError> {
506        let bytes = reader
507            .read()?
508            .read_field_range(INDEX, offset, end - offset)?;
509        String::from_utf8(bytes)
510            .map_err(|e| ReadError::Decode(bincode::error::DecodeError::OtherString(e.to_string())))
511            .map(|s| s.into_boxed_str())
512    }
513}
514
515#[derive(Debug)]
516pub struct BytesField<const INDEX: usize>; /* {
517inner: BytesFieldI,
518}
519
520#[derive(Default, Debug)]
521enum BytesFieldI {
522#[default]
523None,
524Full(Box<[u8]>),
525Range(Box<[(usize, Box<[u8]>)]>),
526}
527 */
528impl<const INDEX: usize> BytesField<INDEX> {
529    /// # Errors
530    pub fn get<const TOTAL: usize>(
531        &self,
532        reader: &LazyFile<TOTAL>,
533    ) -> Result<Box<[u8]>, ReadError> {
534        reader.read()?.read_bytes(INDEX)
535    }
536
537    /// # Errors
538    pub fn get_range<const TOTAL: usize>(
539        &self,
540        reader: &LazyFile<TOTAL>,
541        offset: usize,
542        end: usize,
543    ) -> Result<Box<[u8]>, ReadError> {
544        let bytes = reader
545            .read()?
546            .read_field_range(INDEX, offset, end - offset)?;
547        Ok(bytes.into_boxed_slice())
548    }
549
550    /// # Errors
551    pub fn deserialize_range<const TOTAL: usize, T: bincode::de::Decode<()>>(
552        &self,
553        reader: &LazyFile<TOTAL>,
554        offset: usize,
555        _end: usize,
556    ) -> Result<T, ReadError> {
557        reader.read()?.read_range(INDEX, offset)
558    }
559}
560
561/*
562#[derive(Default)]
563pub struct StringField<const INDEX: usize> {
564    inner: StringFieldI,
565}
566#[derive(Default)]
567enum StringFieldI {
568    #[default]
569    None,
570    Full(Box<str>),
571    Range(Box<[(usize, Box<str>)]>),
572}
573 */