Skip to main content

tex_engine/engine/
filesystem.rs

1/*! Accessing files on `\input`, `\open`/`\write` etc. */
2
3use crate::engine::filesystem::kpathsea::Kpathsea;
4use crate::engine::mouth::strings::InputTokenizer;
5use crate::engine::state::State;
6use crate::engine::utils::outputs::Outputs;
7use crate::engine::{EngineAux, EngineTypes};
8use crate::tex::characters::{Character, StringLineSource, TextLine, TextLineSource};
9use crate::tex::tokens::control_sequences::CSName;
10use crate::utils::errors::{TeXError, TeXResult};
11use crate::utils::{HMap, Ptr};
12use std::path::{Path, PathBuf};
13
14pub mod kpathsea;
15
16/// A [`FileSystem`] provides access to files.
17pub trait FileSystem: Clone {
18    /// The type of files provided by this [`FileSystem`].
19    type File: File;
20    /// Creates a new [`FileSystem`] with the given working directory.
21    fn new(pwd: PathBuf) -> Self;
22    /// Returns the file with the given name in the file database.
23    /// May return nonexistent files in the CWD
24    fn get<S: AsRef<str>>(&mut self, path: S) -> Self::File;
25    /// Sets the working directory of this [`FileSystem`], returning the old working directory
26    /// and updating the file database.
27    fn set_pwd(&mut self, pwd: PathBuf) -> PathBuf;
28
29    /// Opens the file with the given index for writing (`\openout`).
30    fn open_out(&mut self, idx: u8, file: Self::File);
31    /// Opens the file with the given index for reading (`\openin`).
32    fn open_in(&mut self, idx: u8, file: Self::File);
33    /// Closes the file with the given index (`\closein`).
34    fn close_in(&mut self, idx: u8);
35    /// Closes the file with the given index (`\closeout`).
36    fn close_out(&mut self, idx: u8);
37    /// Ehether the file with the given index is at its end (`\ifeof`).
38    fn eof(&self, idx: u8) -> bool;
39    /// Writes the given string to the file with the given index (`\write`).
40    fn write<ET: EngineTypes, D: std::fmt::Display>(
41        &mut self,
42        idx: i64,
43        string: D,
44        newlinechar: Option<ET::Char>,
45        aux: &mut EngineAux<ET>,
46    );
47    /// Reads a line from the file with the given index and current [`CategoryCodeScheme`](crate::tex::catcodes::CategoryCodeScheme) (`\read`),
48    /// respecting groups (i.e. will continue reading at the end of a line until all open groups are closed).
49    fn read<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
50        &mut self,
51        idx: u8,
52        handler: &mut <ET::CSName as CSName<ET::Char>>::Handler,
53        state: &ET::State,
54        cont: F,
55    ) -> TeXResult<(), ET>;
56    /// Reads a line from the file with the given index using [`CategoryCode::Other`](crate::tex::catcodes::CategoryCode::Other)
57    /// expect for space characters (`\readline`).
58
59    fn readline<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
60        &mut self,
61        idx: u8,
62        state: &ET::State,
63        cont: F,
64    ) -> TeXResult<(), ET>;
65
66    /// Returns a human-readable representation of a [`SourceRefID`](File::SourceRefID); e.g. the file name/path.
67    fn ref_str(&self, id: <Self::File as File>::SourceRefID) -> &str;
68}
69
70/// A (virtual or physical) file.
71pub trait File: std::fmt::Display + Clone + std::fmt::Debug + 'static {
72    /// The type of characters to be read from the file.
73    type Char: Character;
74    /// A `Copy`able identifier for this file to be used in [`SourceReference`]s.
75    type SourceRefID: Copy + std::fmt::Debug + Default;
76    /// The type of line sources to be read from the file.
77    type LineSource: FileLineSource<Self::Char>;
78    /// Returns the path of this file.
79    fn path(&self) -> &Path;
80    /// Returns a line source for this file. Used by a [`Mouth`](crate::engine::mouth::Mouth) to read from this file.
81    fn line_source(self) -> Result<Self::LineSource,PathBuf>;
82    /// Returns whether this file exists.
83    fn exists(&self) -> bool {
84        self.path().exists()
85    }
86    /// Returns the size of this file in bytes.
87    fn size(&self) -> usize;
88    /// Returns a [`SourceRefID`](File::SourceRefID) for this file.
89    fn sourceref(&self) -> Self::SourceRefID;
90}
91
92/// An abstraction over a [`TextLineSource`] that has a `Path` - i.e. represents the contents of a file.
93pub trait FileLineSource<C: Character>: TextLineSource<C> {
94    fn path(&self) -> &Path;
95}
96
97/// A [`FileSystem`] that does not write to the local physical file system.
98/// If a file is modified, its contents are kept in memory.
99///
100pub struct NoOutputFileSystem<C: Character> {
101    pub kpse: Kpathsea,
102    files: HMap<PathBuf, VirtualFile<C>>,
103    pub envs: HMap<String, String>,
104    write_files: Vec<Option<WritableVirtualFile<C>>>,
105    read_files: Vec<Option<InputTokenizer<C, VirtualFileLineSource<C>>>>,
106    pub interner: string_interner::StringInterner<
107        string_interner::backend::StringBackend<string_interner::symbol::SymbolU32>,
108        rustc_hash::FxBuildHasher,
109    >,
110}
111impl<C: Character> Clone for NoOutputFileSystem<C> {
112    fn clone(&self) -> Self {
113        Self {
114            kpse: self.kpse.clone(),
115            files: self.files.clone(),
116            write_files: self.write_files.clone(),
117            envs: self.envs.clone(),
118            read_files: Vec::new(),
119            interner: self.interner.clone(),
120        }
121    }
122}
123impl<C: Character> NoOutputFileSystem<C> {
124    pub fn add_file(&mut self, path: PathBuf, file_content: &str) {
125        let string = if path.starts_with(&self.kpse.pwd) {
126            format!("./{}", path.strip_prefix(&self.kpse.pwd).unwrap().display())
127        } else {
128            path.display().to_string()
129        };
130        let source = StringLineSource::make_lines(file_content.bytes());
131        let f = VirtualFile {
132            path,
133            source: Some(source.into()),
134            pipe: false,
135            exists: true,
136            id: Some(self.interner.get_or_intern(string)),
137        };
138        self.files.insert(f.path.clone(), f.clone());
139    }
140}
141impl<C: Character> FileSystem for NoOutputFileSystem<C> {
142    type File = VirtualFile<C>;
143
144    fn new(pwd: PathBuf) -> Self {
145        let mut envs = HMap::default();
146        envs.insert("PWD".to_string(), pwd.display().to_string());
147        envs.insert("CD".to_string(), pwd.display().to_string());
148        Self {
149            envs,
150            kpse: Kpathsea::new(pwd),
151            files: HMap::default(),
152            write_files: Vec::new(),
153            read_files: Vec::new(),
154            interner: string_interner::StringInterner::new(),
155        }
156    }
157    fn ref_str(&self, id: <Self::File as File>::SourceRefID) -> &str {
158        match id {
159            Some(id) => self.interner.resolve(id).unwrap(),
160            None => "(NONE)",
161        }
162    }
163    fn get<S: AsRef<str>>(&mut self, path: S) -> Self::File {
164        let path = path.as_ref();
165        if path.is_empty() {
166            return VirtualFile {
167                path: self.kpse.pwd.clone(),
168                source: None,
169                pipe: false,
170                exists: false,
171                id: Some(self.interner.get_or_intern("")),
172            };
173        }
174        let kpath = self.kpse.kpsewhich(path);
175        match self.files.get(&kpath.path) {
176            Some(f) => f.clone(),
177            None => {
178                if path.starts_with("|kpsewhich ") {
179                    let s = &path[1..];
180                    let out = if cfg!(target_os = "windows") {
181                        std::process::Command::new("cmd")
182                            .current_dir(&self.kpse.pwd)
183                            .envs(self.envs.iter())
184                            .args(["/C", s]) //args.collect::<Vec<&str>>())
185                            .output()
186                            .expect("kpsewhich not found!")
187                            .stdout
188                    } else {
189                        let args = s[10..].split(' ');
190                        std::process::Command::new("kpsewhich")
191                            .current_dir(&self.kpse.pwd)
192                            .envs(self.envs.iter())
193                            .args(args.collect::<Vec<&str>>())
194                            .output()
195                            .expect("kpsewhich not found!")
196                            .stdout
197                    };
198                    let source = Some(StringLineSource::make_lines(out.into_iter()).into());
199                    let f = VirtualFile {
200                        path: kpath.path,
201                        source,
202                        pipe: true,
203                        exists: kpath.exists,
204                        id: Some(self.interner.get_or_intern(path)),
205                    };
206                    self.files.insert(f.path.clone(), f.clone());
207                    return f;
208                }
209                let string = if kpath.path.starts_with(&self.kpse.pwd) {
210                    format!(
211                        "./{}",
212                        kpath.path.strip_prefix(&self.kpse.pwd).unwrap().display()
213                    )
214                } else {
215                    kpath.path.display().to_string()
216                };
217                let f = VirtualFile {
218                    path: kpath.path,
219                    source: None,
220                    pipe: false,
221                    exists: kpath.exists,
222                    id: Some(self.interner.get_or_intern(string)),
223                };
224                self.files.insert(f.path.clone(), f.clone());
225                f
226            }
227        }
228    }
229    fn set_pwd(&mut self, pwd: PathBuf) -> PathBuf {
230        self.envs
231            .insert("PWD".to_string(), pwd.display().to_string());
232        self.envs
233            .insert("CD".to_string(), pwd.display().to_string());
234        let old = std::mem::replace(&mut self.kpse, Kpathsea::new(pwd));
235        old.pwd
236    }
237    fn open_in(&mut self, idx: u8, file: Self::File) {
238        while self.read_files.len() <= idx as usize {
239            self.read_files.push(None);
240        }
241        match self.read_files.get_mut(idx as usize) {
242            Some(n) => *n = file.line_source().ok().map(InputTokenizer::new),
243            _ => unreachable!(),
244        }
245    }
246    fn read<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
247        &mut self,
248        idx: u8,
249        handler: &mut <ET::CSName as CSName<ET::Char>>::Handler,
250        state: &ET::State,
251        cont: F,
252    ) -> TeXResult<(), ET> {
253        match self.read_files.get_mut(idx as usize) {
254            Some(Some(f)) => {
255                match f.read(
256                    handler,
257                    state.get_catcode_scheme(),
258                    state.get_endline_char(),
259                    cont,
260                ) {
261                    Ok(_) => Ok(()),
262                    Err(e) => Err(e.into()),
263                }
264            }
265            _ => Err(TeXError::EmergencyStop),
266        }
267    }
268    fn readline<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
269        &mut self,
270        idx: u8,
271        _state: &ET::State,
272        cont: F,
273    ) -> TeXResult<(), ET> {
274        match self.read_files.get_mut(idx as usize) {
275            Some(Some(f)) => {
276                //debug_log!(debug => "readline: {}",f.source.path.display());
277                f.readline(cont);
278                Ok(())
279            }
280            _ => Err(TeXError::EmergencyStop),
281        }
282    }
283    fn eof(&self, idx: u8) -> bool {
284        match self.read_files.get(idx as usize) {
285            Some(Some(f)) => f.eof(),
286            _ => true,
287        }
288    }
289
290    fn close_in(&mut self, idx: u8) {
291        if let Some(f) = self.read_files.get_mut(idx as usize) {
292            *f = None;
293        }
294    }
295    fn open_out(&mut self, idx: u8, file: Self::File) {
296        if idx as usize >= self.write_files.len() {
297            self.write_files.resize((idx + 1) as usize, None);
298        }
299        self.write_files[idx as usize] = Some(WritableVirtualFile::new(file.path, file.id.unwrap()))
300    }
301    fn close_out(&mut self, idx: u8) {
302        if let Some(o) = self.write_files.get_mut(idx as usize) {
303            if let Some(f) = std::mem::take(o) {
304                let vf = VirtualFile {
305                    path: f.1,
306                    exists: true,
307                    source: Some(f.0.into()),
308                    id: Some(f.2),
309                    pipe: false,
310                };
311                self.files.insert(vf.path.clone(), vf);
312            }
313        }
314    }
315    fn write<ET: EngineTypes, D: std::fmt::Display>(
316        &mut self,
317        idx: i64,
318        string: D,
319        newlinechar: Option<ET::Char>,
320        aux: &mut EngineAux<ET>,
321    ) {
322        if idx < 0 {
323            aux.outputs.write_neg1(string)
324        } else if idx == 16 {
325            aux.outputs.write_16(string)
326        } else if idx == 17 {
327            aux.outputs.write_17(string)
328        } else if idx == 18 {
329            aux.outputs.write_18(string)
330        } else {
331            match self.write_files.get_mut(idx as usize) {
332                Some(Some(f)) => {
333                    let s = string.to_string().into_bytes();
334                    if let Some(c) = newlinechar {
335                        if let Ok(u) = c.try_into() {
336                            for l in s.split(|b| *b == u) {
337                                f.0.push(C::convert(l.to_vec()));
338                            }
339                            return;
340                        }
341                    }
342                    let tl = C::convert(s);
343                    f.0.push(tl);
344                }
345                _ => aux.outputs.write_other(string),
346            }
347        }
348    }
349}
350
351/// A [`File`] that may live in memory or on the physical file system.
352#[derive(Clone, Debug)]
353pub struct VirtualFile<C: Character> {
354    pub path: PathBuf,
355    pub pipe: bool,
356    pub id: Option<string_interner::symbol::SymbolU32>,
357    pub source: Option<VirtualFileContents<C>>,
358    pub exists: bool,
359}
360impl<C: Character> std::fmt::Display for VirtualFile<C> {
361    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
362        write!(f, "{}", self.path.display())
363    }
364}
365
366impl<C: Character> File for VirtualFile<C> {
367    type Char = C;
368    type LineSource = VirtualFileLineSource<C>;
369    type SourceRefID = Option<string_interner::symbol::SymbolU32>;
370
371    fn exists(&self) -> bool {
372        self.exists
373    }
374
375    fn sourceref(&self) -> Self::SourceRefID {
376        self.id
377    }
378
379    fn path(&self) -> &Path {
380        &self.path
381    }
382    fn line_source(self) -> Result<Self::LineSource,PathBuf> {
383        use std::io::BufRead;
384        match self.source {
385            Some(src) => Ok(VirtualFileLineSource {
386                path: self.path,
387                source: VirtualOrPhysicalFile::Virtual(src, 0),
388            }),
389            None => {
390                let Some(f) = std::fs::File::open(&self.path).ok() else {
391                    return Err(self.path)
392                };
393                let f = std::io::BufReader::new(f);
394                let f = f.split(b'\n');
395                Ok(VirtualFileLineSource {
396                    path: self.path,
397                    source: VirtualOrPhysicalFile::Physical(f),
398                })
399            }
400        }
401    }
402    fn size(&self) -> usize {
403        match &self.source {
404            Some(src) => {
405                let cnt = src.iter().map(|s| s.len()).sum::<usize>() + src.len();
406                if cnt == 0 {
407                    0
408                } else {
409                    cnt - 1
410                }
411            }
412            None => match std::fs::metadata(&self.path) {
413                Ok(md) => md.len() as usize,
414                _ => 0,
415            },
416        }
417    }
418}
419
420/// A [`TextLineSource`] that lives in memory.
421#[derive(Debug)]
422pub struct VirtualFileLineSource<C: Character> {
423    path: PathBuf,
424    source: VirtualOrPhysicalFile<C>,
425}
426impl<C: Character> FileLineSource<C> for VirtualFileLineSource<C> {
427    fn path(&self) -> &Path {
428        &self.path
429    }
430}
431impl<C: Character> TextLineSource<C> for VirtualFileLineSource<C> {
432    fn get_line(&mut self) -> Option<TextLine<C>> {
433        match &mut self.source {
434            VirtualOrPhysicalFile::Virtual(v, i) => {
435                if *i >= v.len() {
436                    None
437                } else {
438                    let ret = v[*i].clone();
439                    *i += 1;
440                    Some(ret)
441                }
442            }
443            VirtualOrPhysicalFile::Physical(f) => match f.next() {
444                Some(Ok(mut s)) => {
445                    if let Some(b'\r') = s.last() {
446                        s.pop();
447                    }
448                    while let Some(b' ') = s.last() {
449                        s.pop();
450                        if s.last() == Some(&b'\\') {
451                            s.push(b' ');
452                            break;
453                        }
454                    }
455                    Some(C::convert(s))
456                }
457                _ => None,
458            },
459        }
460    }
461}
462
463/// A [`SourceReference`] is a reference to a location in a file.
464#[derive(Debug, Copy, Clone)]
465pub struct SourceReference<FileId: Copy + Default> {
466    /// The file this [`SourceReference`] refers to.
467    pub file: FileId,
468    /// The line number of this [`SourceReference`].
469    pub line: usize,
470    /// The column number of this [`SourceReference`].
471    pub column: usize,
472}
473impl<FileID: Copy + Default> SourceReference<FileID> {
474    /// Yields a [`Display`](std::fmt::Display)able version of this [`SourceReference`].
475    pub fn display<'a, F: File<SourceRefID = FileID>, FS: FileSystem<File = F>>(
476        &'a self,
477        fs: &'a FS,
478    ) -> impl std::fmt::Display + 'a {
479        DisplaySourceReference { rf: self, fs }
480    }
481}
482impl<FileID: Copy + Default> Default for SourceReference<FileID> {
483    fn default() -> Self {
484        Self {
485            file: Default::default(),
486            line: 0,
487            column: 0,
488        }
489    }
490}
491pub type SourceRef<ET> = SourceReference<<<ET as EngineTypes>::File as File>::SourceRefID>;
492
493struct DisplaySourceReference<'a, FS: FileSystem> {
494    rf: &'a SourceReference<<<FS as FileSystem>::File as File>::SourceRefID>,
495    fs: &'a FS,
496}
497impl<'a, FS: FileSystem> std::fmt::Display for DisplaySourceReference<'a, FS> {
498    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
499        write!(
500            f,
501            "{} l. {} c. {}",
502            self.fs.ref_str(self.rf.file),
503            self.rf.line,
504            self.rf.column
505        )
506    }
507}
508
509#[derive(Clone)]
510struct WritableVirtualFile<C: Character>(
511    Vec<Box<[C]>>,
512    PathBuf,
513    string_interner::symbol::SymbolU32,
514);
515impl<C: Character> WritableVirtualFile<C> {
516    fn new(p: PathBuf, id: string_interner::symbol::SymbolU32) -> Self {
517        Self(Vec::new(), p, id)
518    }
519}
520
521type VirtualFileContents<C> = Ptr<[TextLine<C>]>;
522
523#[derive(Debug)]
524enum VirtualOrPhysicalFile<C: Character> {
525    Virtual(VirtualFileContents<C>, usize),
526    Physical(std::io::Split<std::io::BufReader<std::fs::File>>),
527}