Skip to main content

tex_engine/engine/
filesystem.rs

1/*! Accessing files on `\input`, `\open`/`\write` etc. */
2
3use crate::engine::filesystem::kpathsea::Kpathsea;
4use crate::engine::mouth::strings::InputTokenizer;
5use crate::engine::state::State;
6use crate::engine::utils::outputs::Outputs;
7use crate::engine::{EngineAux, EngineTypes};
8use crate::tex::characters::{Character, StringLineSource, TextLine, TextLineSource};
9use crate::tex::tokens::control_sequences::CSName;
10use crate::utils::errors::{TeXError, TeXResult};
11use crate::utils::{HMap, Ptr};
12use std::path::{Path, PathBuf};
13
14pub mod kpathsea;
15
16/// A [`FileSystem`] provides access to files.
17pub trait FileSystem: Clone {
18    /// The type of files provided by this [`FileSystem`].
19    type File: File;
20    /// Creates a new [`FileSystem`] with the given working directory.
21    fn new(pwd: PathBuf) -> Self;
22    /// Returns the file with the given name in the file database.
23    /// May return nonexistent files in the CWD
24    fn get<S: AsRef<str>>(&mut self, path: S) -> Self::File;
25    /// Sets the working directory of this [`FileSystem`], returning the old working directory
26    /// and updating the file database.
27    fn set_pwd(&mut self, pwd: PathBuf) -> PathBuf;
28
29    /// Opens the file with the given index for writing (`\openout`).
30    fn open_out(&mut self, idx: u8, file: Self::File);
31    /// Opens the file with the given index for reading (`\openin`).
32    fn open_in(&mut self, idx: u8, file: Self::File);
33    /// Closes the file with the given index (`\closein`).
34    fn close_in(&mut self, idx: u8);
35    /// Closes the file with the given index (`\closeout`).
36    fn close_out(&mut self, idx: u8);
37    /// Ehether the file with the given index is at its end (`\ifeof`).
38    fn eof(&self, idx: u8) -> bool;
39    /// Writes the given string to the file with the given index (`\write`).
40    fn write<ET: EngineTypes, D: std::fmt::Display>(
41        &mut self,
42        idx: i64,
43        string: D,
44        newlinechar: Option<ET::Char>,
45        aux: &mut EngineAux<ET>,
46    );
47    /// Reads a line from the file with the given index and current [`CategoryCodeScheme`](crate::tex::catcodes::CategoryCodeScheme) (`\read`),
48    /// respecting groups (i.e. will continue reading at the end of a line until all open groups are closed).
49    fn read<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
50        &mut self,
51        idx: u8,
52        handler: &mut <ET::CSName as CSName<ET::Char>>::Handler,
53        state: &ET::State,
54        cont: F,
55    ) -> TeXResult<(), ET>;
56    /// Reads a line from the file with the given index using [`CategoryCode::Other`](crate::tex::catcodes::CategoryCode::Other)
57    /// expect for space characters (`\readline`).
58
59    fn readline<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
60        &mut self,
61        idx: u8,
62        state: &ET::State,
63        cont: F,
64    ) -> TeXResult<(), ET>;
65
66    /// Returns a human-readable representation of a [`SourceRefID`](File::SourceRefID); e.g. the file name/path.
67    fn ref_str(&self, id: <Self::File as File>::SourceRefID) -> &str;
68}
69
70/// A (virtual or physical) file.
71pub trait File: std::fmt::Display + Clone + std::fmt::Debug + 'static {
72    /// The type of characters to be read from the file.
73    type Char: Character;
74    /// A `Copy`able identifier for this file to be used in [`SourceReference`]s.
75    type SourceRefID: Copy + std::fmt::Debug + Default;
76    /// The type of line sources to be read from the file.
77    type LineSource: FileLineSource<Self::Char>;
78    /// Returns the path of this file.
79    fn path(&self) -> &Path;
80    /// Returns a line source for this file. Used by a [`Mouth`](crate::engine::mouth::Mouth) to read from this file.
81    fn line_source(self) -> Result<Self::LineSource, PathBuf>;
82    /// Returns whether this file exists.
83    fn exists(&self) -> bool {
84        self.path().exists()
85    }
86    /// Returns the size of this file in bytes.
87    fn size(&self) -> usize;
88    /// Returns a [`SourceRefID`](File::SourceRefID) for this file.
89    fn sourceref(&self) -> Self::SourceRefID;
90}
91
92/// An abstraction over a [`TextLineSource`] that has a `Path` - i.e. represents the contents of a file.
93pub trait FileLineSource<C: Character>: TextLineSource<C> {
94    fn path(&self) -> &Path;
95}
96
97/// A [`FileSystem`] that does not write to the local physical file system.
98/// If a file is modified, its contents are kept in memory.
99///
100pub struct NoOutputFileSystem<C: Character> {
101    pub kpse: Kpathsea,
102    files: HMap<PathBuf, VirtualFile<C>>,
103    pub envs: HMap<String, String>,
104    write_files: Vec<Option<WritableVirtualFile<C>>>,
105    read_files: Vec<Option<InputTokenizer<C, VirtualFileLineSource<C>>>>,
106    pub interner: string_interner::StringInterner<
107        string_interner::backend::StringBackend<string_interner::symbol::SymbolU32>,
108        rustc_hash::FxBuildHasher,
109    >,
110}
111impl<C: Character> Clone for NoOutputFileSystem<C> {
112    fn clone(&self) -> Self {
113        Self {
114            kpse: self.kpse.clone(),
115            files: self.files.clone(),
116            write_files: self.write_files.clone(),
117            envs: self.envs.clone(),
118            read_files: Vec::new(),
119            interner: self.interner.clone(),
120        }
121    }
122}
123impl<C: Character> NoOutputFileSystem<C> {
124    pub fn add_file(&mut self, path: PathBuf, file_content: &str) {
125        let string = if path.starts_with(&self.kpse.pwd) {
126            format!("./{}", path.strip_prefix(&self.kpse.pwd).unwrap().display())
127        } else {
128            path.display().to_string()
129        };
130        let source = StringLineSource::make_lines(file_content.bytes());
131        let f = VirtualFile {
132            path,
133            source: Some(source.into()),
134            pipe: false,
135            exists: true,
136            id: Some(self.interner.get_or_intern(string)),
137        };
138        self.files.insert(f.path.clone(), f.clone());
139    }
140}
141impl<C: Character> FileSystem for NoOutputFileSystem<C> {
142    type File = VirtualFile<C>;
143
144    fn new(pwd: PathBuf) -> Self {
145        let mut envs = HMap::default();
146        envs.insert("PWD".to_string(), pwd.display().to_string());
147        envs.insert("CD".to_string(), pwd.display().to_string());
148        Self {
149            envs,
150            kpse: Kpathsea::new(pwd),
151            files: HMap::default(),
152            write_files: Vec::new(),
153            read_files: Vec::new(),
154            interner: string_interner::StringInterner::new(),
155        }
156    }
157    fn ref_str(&self, id: <Self::File as File>::SourceRefID) -> &str {
158        match id {
159            Some(id) => self.interner.resolve(id).unwrap(),
160            None => "(NONE)",
161        }
162    }
163    fn get<S: AsRef<str>>(&mut self, path: S) -> Self::File {
164        let path = path.as_ref();
165        if path.is_empty() {
166            return VirtualFile {
167                path: self.kpse.pwd.clone(),
168                source: None,
169                pipe: false,
170                exists: false,
171                id: Some(self.interner.get_or_intern("")),
172            };
173        }
174        let kpath = self.kpse.kpsewhich(path);
175        match self.files.get(&kpath.path) {
176            Some(f) => f.clone(),
177            None => {
178                if path.starts_with("|kpsewhich ") {
179                    let s = &path[1..];
180                    let out = if cfg!(target_os = "windows") {
181                        std::process::Command::new("cmd")
182                            .current_dir(&self.kpse.pwd)
183                            .envs(self.envs.iter())
184                            .args(["/C", s]) //args.collect::<Vec<&str>>())
185                            .output()
186                            .expect("kpsewhich not found!")
187                            .stdout
188                    } else {
189                        let args = s[10..].split(' ');
190                        std::process::Command::new("kpsewhich")
191                            .current_dir(&self.kpse.pwd)
192                            .envs(self.envs.iter())
193                            .args(args.collect::<Vec<&str>>())
194                            .output()
195                            .expect("kpsewhich not found!")
196                            .stdout
197                    };
198                    let source = Some(StringLineSource::make_lines(out.into_iter()).into());
199                    let f = VirtualFile {
200                        path: kpath.path,
201                        source,
202                        pipe: true,
203                        exists: kpath.exists,
204                        id: Some(self.interner.get_or_intern(path)),
205                    };
206                    self.files.insert(f.path.clone(), f.clone());
207                    return f;
208                }
209                let string = if kpath.path.starts_with(&self.kpse.pwd) {
210                    format!(
211                        "./{}",
212                        kpath.path.strip_prefix(&self.kpse.pwd).unwrap().display()
213                    )
214                } else {
215                    kpath.path.display().to_string()
216                };
217                let f = VirtualFile {
218                    path: kpath.path,
219                    source: None,
220                    pipe: false,
221                    exists: kpath.exists,
222                    id: Some(self.interner.get_or_intern(string)),
223                };
224                self.files.insert(f.path.clone(), f.clone());
225                f
226            }
227        }
228    }
229    fn set_pwd(&mut self, pwd: PathBuf) -> PathBuf {
230        self.envs
231            .insert("PWD".to_string(), pwd.display().to_string());
232        self.envs
233            .insert("CD".to_string(), pwd.display().to_string());
234        let old = std::mem::replace(&mut self.kpse, Kpathsea::new(pwd));
235        old.pwd
236    }
237    fn open_in(&mut self, idx: u8, file: Self::File) {
238        while self.read_files.len() <= idx as usize {
239            self.read_files.push(None);
240        }
241        match self.read_files.get_mut(idx as usize) {
242            Some(n) => *n = file.line_source().ok().map(InputTokenizer::new),
243            _ => unreachable!(),
244        }
245    }
246    fn read<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
247        &mut self,
248        idx: u8,
249        handler: &mut <ET::CSName as CSName<ET::Char>>::Handler,
250        state: &ET::State,
251        cont: F,
252    ) -> TeXResult<(), ET> {
253        match self.read_files.get_mut(idx as usize) {
254            Some(Some(f)) => {
255                let par = state.get_par_token();
256                match f.read(
257                    handler,
258                    state.get_catcode_scheme(),
259                    state.get_endline_char(),
260                    &par,
261                    cont,
262                ) {
263                    Ok(_) => Ok(()),
264                    Err(e) => Err(e.into()),
265                }
266            }
267            _ => Err(TeXError::EmergencyStop),
268        }
269    }
270    fn readline<ET: EngineTypes<Char = <Self::File as File>::Char>, F: FnMut(ET::Token)>(
271        &mut self,
272        idx: u8,
273        _state: &ET::State,
274        cont: F,
275    ) -> TeXResult<(), ET> {
276        match self.read_files.get_mut(idx as usize) {
277            Some(Some(f)) => {
278                //debug_log!(debug => "readline: {}",f.source.path.display());
279                f.readline(cont);
280                Ok(())
281            }
282            _ => Err(TeXError::EmergencyStop),
283        }
284    }
285    fn eof(&self, idx: u8) -> bool {
286        match self.read_files.get(idx as usize) {
287            Some(Some(f)) => f.eof(),
288            _ => true,
289        }
290    }
291
292    fn close_in(&mut self, idx: u8) {
293        if let Some(f) = self.read_files.get_mut(idx as usize) {
294            *f = None;
295        }
296    }
297    fn open_out(&mut self, idx: u8, file: Self::File) {
298        if idx as usize >= self.write_files.len() {
299            self.write_files.resize((idx + 1) as usize, None);
300        }
301        self.write_files[idx as usize] = Some(WritableVirtualFile::new(file.path, file.id.unwrap()))
302    }
303    fn close_out(&mut self, idx: u8) {
304        if let Some(o) = self.write_files.get_mut(idx as usize) {
305            if let Some(f) = std::mem::take(o) {
306                let vf = VirtualFile {
307                    path: f.1,
308                    exists: true,
309                    source: Some(f.0.into()),
310                    id: Some(f.2),
311                    pipe: false,
312                };
313                self.files.insert(vf.path.clone(), vf);
314            }
315        }
316    }
317    fn write<ET: EngineTypes, D: std::fmt::Display>(
318        &mut self,
319        idx: i64,
320        string: D,
321        newlinechar: Option<ET::Char>,
322        aux: &mut EngineAux<ET>,
323    ) {
324        if idx < 0 {
325            aux.outputs.write_neg1(string)
326        } else if idx == 16 {
327            aux.outputs.write_16(string)
328        } else if idx == 17 {
329            aux.outputs.write_17(string)
330        } else if idx == 18 {
331            aux.outputs.write_18(string)
332        } else {
333            match self.write_files.get_mut(idx as usize) {
334                Some(Some(f)) => {
335                    let s = string.to_string().into_bytes();
336                    if let Some(c) = newlinechar {
337                        if let Ok(u) = c.try_into() {
338                            for l in s.split(|b| *b == u) {
339                                f.0.push(C::convert(l.to_vec()));
340                            }
341                            return;
342                        }
343                    }
344                    let tl = C::convert(s);
345                    f.0.push(tl);
346                }
347                _ => aux.outputs.write_other(string),
348            }
349        }
350    }
351}
352
353/// A [`File`] that may live in memory or on the physical file system.
354#[derive(Clone, Debug)]
355pub struct VirtualFile<C: Character> {
356    pub path: PathBuf,
357    pub pipe: bool,
358    pub id: Option<string_interner::symbol::SymbolU32>,
359    pub source: Option<VirtualFileContents<C>>,
360    pub exists: bool,
361}
362impl<C: Character> std::fmt::Display for VirtualFile<C> {
363    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364        write!(f, "{}", self.path.display())
365    }
366}
367
368impl<C: Character> File for VirtualFile<C> {
369    type Char = C;
370    type LineSource = VirtualFileLineSource<C>;
371    type SourceRefID = Option<string_interner::symbol::SymbolU32>;
372
373    fn exists(&self) -> bool {
374        self.exists
375    }
376
377    fn sourceref(&self) -> Self::SourceRefID {
378        self.id
379    }
380
381    fn path(&self) -> &Path {
382        &self.path
383    }
384    fn line_source(self) -> Result<Self::LineSource, PathBuf> {
385        use std::io::BufRead;
386        match self.source {
387            Some(src) => Ok(VirtualFileLineSource {
388                path: self.path,
389                source: VirtualOrPhysicalFile::Virtual(src, 0),
390            }),
391            None => {
392                let Some(f) = std::fs::File::open(&self.path).ok() else {
393                    return Err(self.path);
394                };
395                let f = std::io::BufReader::new(f);
396                let f = f.split(b'\n');
397                Ok(VirtualFileLineSource {
398                    path: self.path,
399                    source: VirtualOrPhysicalFile::Physical(f),
400                })
401            }
402        }
403    }
404    fn size(&self) -> usize {
405        match &self.source {
406            Some(src) => {
407                let cnt = src.iter().map(|s| s.len()).sum::<usize>() + src.len();
408                if cnt == 0 {
409                    0
410                } else {
411                    cnt - 1
412                }
413            }
414            None => match std::fs::metadata(&self.path) {
415                Ok(md) => md.len() as usize,
416                _ => 0,
417            },
418        }
419    }
420}
421
422/// A [`TextLineSource`] that lives in memory.
423#[derive(Debug)]
424pub struct VirtualFileLineSource<C: Character> {
425    path: PathBuf,
426    source: VirtualOrPhysicalFile<C>,
427}
428impl<C: Character> FileLineSource<C> for VirtualFileLineSource<C> {
429    fn path(&self) -> &Path {
430        &self.path
431    }
432}
433impl<C: Character> TextLineSource<C> for VirtualFileLineSource<C> {
434    fn get_line(&mut self) -> Option<TextLine<C>> {
435        match &mut self.source {
436            VirtualOrPhysicalFile::Virtual(v, i) => {
437                if *i >= v.len() {
438                    None
439                } else {
440                    let ret = v[*i].clone();
441                    *i += 1;
442                    Some(ret)
443                }
444            }
445            VirtualOrPhysicalFile::Physical(f) => match f.next() {
446                Some(Ok(mut s)) => {
447                    if let Some(b'\r') = s.last() {
448                        s.pop();
449                    }
450                    while let Some(b' ') = s.last() {
451                        s.pop();
452                        if s.last() == Some(&b'\\') {
453                            s.push(b' ');
454                            break;
455                        }
456                    }
457                    Some(C::convert(s))
458                }
459                _ => None,
460            },
461        }
462    }
463}
464
465/// A [`SourceReference`] is a reference to a location in a file.
466#[derive(Debug, Copy, Clone)]
467pub struct SourceReference<FileId: Copy + Default> {
468    /// The file this [`SourceReference`] refers to.
469    pub file: FileId,
470    /// The line number of this [`SourceReference`].
471    pub line: usize,
472    /// The column number of this [`SourceReference`].
473    pub column: usize,
474}
475impl<FileID: Copy + Default> SourceReference<FileID> {
476    /// Yields a [`Display`](std::fmt::Display)able version of this [`SourceReference`].
477    pub fn display<'a, F: File<SourceRefID = FileID>, FS: FileSystem<File = F>>(
478        &'a self,
479        fs: &'a FS,
480    ) -> impl std::fmt::Display + 'a {
481        DisplaySourceReference { rf: self, fs }
482    }
483}
484impl<FileID: Copy + Default> Default for SourceReference<FileID> {
485    fn default() -> Self {
486        Self {
487            file: Default::default(),
488            line: 0,
489            column: 0,
490        }
491    }
492}
493pub type SourceRef<ET> = SourceReference<<<ET as EngineTypes>::File as File>::SourceRefID>;
494
495struct DisplaySourceReference<'a, FS: FileSystem> {
496    rf: &'a SourceReference<<<FS as FileSystem>::File as File>::SourceRefID>,
497    fs: &'a FS,
498}
499impl<'a, FS: FileSystem> std::fmt::Display for DisplaySourceReference<'a, FS> {
500    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
501        write!(
502            f,
503            "{} l. {} c. {}",
504            self.fs.ref_str(self.rf.file),
505            self.rf.line,
506            self.rf.column
507        )
508    }
509}
510
511#[derive(Clone)]
512struct WritableVirtualFile<C: Character>(
513    Vec<Box<[C]>>,
514    PathBuf,
515    string_interner::symbol::SymbolU32,
516);
517impl<C: Character> WritableVirtualFile<C> {
518    fn new(p: PathBuf, id: string_interner::symbol::SymbolU32) -> Self {
519        Self(Vec::new(), p, id)
520    }
521}
522
523type VirtualFileContents<C> = Ptr<[TextLine<C>]>;
524
525#[derive(Debug)]
526enum VirtualOrPhysicalFile<C: Character> {
527    Virtual(VirtualFileContents<C>, usize),
528    Physical(std::io::Split<std::io::BufReader<std::fs::File>>),
529}