Skip to main content

tex_engine/engine/filesystem/
kpathsea.rs

1/*! An implementation of (the central part of) kpathsea, the path searching library used by TeX.
2Used by instantiating a [Kpathsea] instance with the working directory.
3
4
5**Example:**
6```rust
7use tex_engine::engine::filesystem::kpathsea::Kpathsea;
8let kpse = Kpathsea::new(std::env::current_dir().unwrap());
9assert!(kpse.kpsewhich("latex.ltx").path.to_str().unwrap().replace('\\' ,"/").ends_with("tex/latex/base/latex.ltx"));
10assert!(kpse.kpsewhich("article.cls").path.to_str().unwrap().replace('\\' ,"/").ends_with("tex/latex/base/article.cls"));
11// as expected, the `.tex` file extension is optional:
12assert!(kpse.kpsewhich("expl3-code").path.to_str().unwrap().replace('\\' ,"/").ends_with("tex/latex/l3kernel/expl3-code.tex"));
13```
14*/
15
16use crate::utils::HMap;
17use lazy_static::lazy_static;
18use std::collections::hash_map::Entry;
19use std::fmt::Debug;
20use std::fs::File;
21use std::io::BufRead;
22use std::path::{Path, PathBuf};
23use std::sync::Arc;
24
25/// The result of a [`Kpathsea`] search.
26///
27/// TODO capitalization might be an issue. TeX is more permissive wrt case distinctions
28/// than the filesystem, (apparently) iff either not covered by an ls-R file or
29/// directly in a TEXINPUTS path...?
30pub struct KpseResult {
31    /// The path to the file.
32    pub path: PathBuf,
33    /// Whether the file exists.
34    pub exists: bool,
35}
36
37lazy_static! {
38    pub static ref KPATHSEA: Arc<KpathseaBase> = Arc::new(KpathseaBase::new());
39}
40
41/** A "database" of paths to search for files. Notably, the "global" part (e.g. the system-wide
42`TEXINPUTS`, `TEXMF`, etc.) is shared between all instances of [`Kpathsea`].
43and lazily computed on first use.
44 **/
45#[derive(Clone)]
46pub struct Kpathsea {
47    pub pwd: PathBuf,
48    pub local: HMap<String, PathBuf>,
49    pub global: Arc<KpathseaBase>,
50}
51impl Kpathsea {
52    /// Create a new [`Kpathsea`] instance with the given working directory.
53    pub fn new(pwd: PathBuf) -> Kpathsea {
54        let global = KPATHSEA.clone();
55        let local = get_dot(global.recdot, &pwd);
56        Self { pwd, local, global }
57    }
58
59    /// Search for a file in the database.
60    pub fn kpsewhich<S: AsRef<str>>(&self, filestr: S) -> KpseResult {
61        use path_dedot::*;
62        let filestr = filestr.as_ref();
63        if filestr.starts_with("|kpsewhich") {
64            return KpseResult {
65                path: self.pwd.join(filestr),
66                exists: true,
67            };
68        }
69        if filestr.starts_with("nul:") && cfg!(target_os = "windows") {
70            return KpseResult {
71                path: PathBuf::from(filestr),
72                exists: true,
73            };
74        } else if filestr.starts_with("nul:") {
75            return KpseResult {
76                path: self.pwd.join(filestr),
77                exists: false,
78            };
79        } else if filestr.is_empty() {
80            panic!("Empty string in kpsewhich")
81        }
82        if filestr.contains("./") {
83            let p1 = self.pwd.join(Path::new(filestr));
84            let p = p1.parse_dot().unwrap();
85            if p.is_file() {
86                return KpseResult {
87                    exists: true,
88                    path: p.to_path_buf(),
89                };
90            }
91            let q = PathBuf::from(p.display().to_string() + ".tex");
92            if q.is_file() {
93                return KpseResult {
94                    exists: true,
95                    path: q,
96                };
97            }
98            return KpseResult {
99                exists: false,
100                path: p.to_path_buf(),
101            };
102        }
103        if Path::new(filestr).is_absolute() {
104            if Path::new(filestr).is_file() {
105                return KpseResult {
106                    path: PathBuf::from(&filestr),
107                    exists: true,
108                };
109            }
110            let pb = PathBuf::from(filestr.to_string() + ".tex");
111            if pb.is_file() {
112                return KpseResult {
113                    path: pb,
114                    exists: true,
115                };
116            }
117            return KpseResult {
118                path: PathBuf::from(&filestr),
119                exists: false,
120            };
121            //else {return KpseResult{path:PathBuf::from(format!("{}.tex",filestr)),exists:false} }
122        }
123        if let Some(p) = self.global.pre.get(filestr) {
124            return KpseResult {
125                path: p.clone(),
126                exists: true,
127            };
128        }
129        if let Some(p) = self.local.get(filestr) {
130            return KpseResult {
131                path: p.clone(),
132                exists: true,
133            };
134        }
135        if let Some(p) = self.global.post.get(filestr) {
136            return KpseResult {
137                path: p.clone(),
138                exists: true,
139            };
140        }
141        let p = self.pwd.join(Path::new(filestr));
142        KpseResult {
143            exists: p.exists(),
144            path: p,
145        }
146    }
147}
148
149/// The "base" part of [`Kpathsea`] holding information about the "global" parts of the file database, which is
150/// (or should be) shared between all instances. Never needs to be instantiated directly;
151/// use the canoncial [`static@KPATHSEA`] instance instead.
152#[derive(Clone, Debug)]
153pub struct KpathseaBase {
154    /// The paths to search before the working directory.
155    pub pre: HMap<String, PathBuf>,
156    /// Whether to search recursively in the working directory.
157    pub recdot: bool,
158    /// The paths to search after the working directory.
159    pub post: HMap<String, PathBuf>,
160}
161
162pub static LOG_KPATHSEA: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
163
164impl KpathseaBase {
165    fn new() -> KpathseaBase {
166        let log = LOG_KPATHSEA.load(std::sync::atomic::Ordering::Relaxed);
167        let locout = std::process::Command::new("kpsewhich")
168            .args(vec!["-var-value", "SELFAUTOLOC"])
169            .output()
170            .expect("kpsewhich not found!")
171            .stdout;
172        let loc = std::str::from_utf8(locout.as_slice()).unwrap().trim();
173        let selfautoloc = Path::new(loc);
174
175        let mut vars = Self::get_vars(selfautoloc, loc.contains("miktex"));
176        let home = if cfg!(target_os = "windows") {
177            std::env::vars().find(|x| x.0 == "HOMEDRIVE").unwrap().1
178                + &std::env::vars().find(|x| x.0 == "HOMEPATH").unwrap().1
179        } else {
180            std::env::vars().find(|x| x.0 == "HOME").unwrap().1
181        };
182        if log {
183            println!("Variables:\n-------------------------");
184            for (k, v) in &vars {
185                println!("{k}:   {v}");
186            }
187        }
188        let paths = Self::paths_to_scan(&mut vars);
189        if log {
190            println!("-------------------------\nScan paths:\n-------------------------");
191            for p in &paths {
192                println!("{}", p);
193            }
194        }
195        let mut parser = PathParser {
196            vars,
197            diddot: false,
198            recdot: false,
199            predot: vec![],
200            postdot: vec![],
201            home: PathBuf::from(home),
202            resolved_vars: HMap::default(),
203        };
204
205        for s in paths {
206            parser.do_dir(&s);
207        }
208        if log {
209            println!("-------------------------\nResolved variables:\n-------------------------");
210            for (k, v) in &parser.resolved_vars {
211                let val = v
212                    .iter()
213                    .map(|v| String::from_utf8_lossy(v))
214                    .collect::<Vec<_>>()
215                    .join("; ");
216                println!("{k}:   {val}");
217            }
218            println!("-------------------------\nResolved paths:\n-------------------------");
219            for (p, b) in &parser.predot {
220                println!("{} ({b})", p.display());
221            }
222            for (p, b) in &parser.postdot {
223                println!("{} ({b})", p.display());
224            }
225            println!("-------------------------\n");
226        }
227        let (pre, dot, post) = parser.close(log);
228        if log {
229            println!("-------------------------\n");
230        }
231        KpathseaBase {
232            pre,
233            recdot: dot,
234            post,
235        }
236    }
237
238    /// Search for a file in the database; basically `kpsewhich <file>`, but without the working directory.
239    pub fn which<S: AsRef<str>>(&self, filestr: S) -> Option<PathBuf> {
240        let filestr = filestr.as_ref();
241        if Path::new(filestr).is_absolute() {
242            return Some(PathBuf::from(filestr));
243        }
244        if let Some(p) = self.pre.get(filestr) {
245            return Some(p.clone());
246        }
247        if let Some(p) = self.post.get(filestr) {
248            return Some(p.clone());
249        }
250        None
251    }
252
253    fn get_vars(selfautoloc: &Path, is_miktex: bool) -> HMap<String, String> {
254        fn texlive(vars: &mut HMap<String, String>) {
255            let out = std::process::Command::new("kpsewhich")
256                .args(vec!["-a", "texmf.cnf"])
257                .output()
258                .expect("kpsewhich not found!")
259                .stdout;
260            let outstr = std::str::from_utf8(out.as_slice()).unwrap();
261            let rs = outstr
262                .split('\n')
263                .map(|x| x.trim())
264                .filter(|s| !s.is_empty());
265            for r in rs {
266                let p = Path::new(r);
267                if let Ok(f) = File::open(p) {
268                    let lines = std::io::BufReader::new(f).lines();
269                    for l in lines.map_while(Result::ok) {
270                        if !l.starts_with('%') && !l.is_empty() {
271                            let mut kb = l.split('=').map(|x| x.trim()).collect::<Vec<_>>();
272                            if kb.len() == 2 {
273                                let v = kb.pop().unwrap();
274                                let mut k = kb.pop().unwrap();
275                                if let Some(i) = k.find('.') {
276                                    let pre = &k[..i];
277                                    let post = &k[i + 1..];
278                                    if post != "pdftex" {
279                                        continue;
280                                    }
281                                    k = pre;
282                                }
283                                if k.chars().any(|c| c.is_lowercase()) {
284                                    continue;
285                                }
286                                match vars.entry(k.to_string()) {
287                                    Entry::Occupied(_) => (),
288                                    Entry::Vacant(e) => {
289                                        e.insert(v.to_string());
290                                    }
291                                }
292                            }
293                        }
294                    }
295                }
296            }
297        }
298        fn miktex(vars: &mut HMap<String, String>) {
299            static TEXMF_CNF: [(&str, &str); 11] = [
300                ("TEXMFHOME", "~/texmf"),
301                ("TEXMFDOTDIR", "."),
302                ("TEXMFROOT", "$SELFAUTOGRANDPARENT"),
303                ("TEXMFDIST", "$SELFAUTOGRANDPARENT"),
304                ("TEXMF", "{$TEXMFHOME,!!$TEXMFDIST}"),
305                (
306                    "TEXINPUTS",
307                    "$TEXMFDOTDIR;$TEXMF/tex/{plain,generic,latex,}//",
308                ),
309                ("VARTEXFONTS", "$SELFAUTOGRANDPARENT/fonts"),
310                ("VFFONTS", "$TEXMF/fonts/vf//"),
311                ("TFMFONTS", "$TEXMF/fonts/tfm//"),
312                ("T1FONTS", "$TEXMF/fonts/type1//"),
313                ("ENCFONTS", "$TEXMF/fonts/enc//"),
314            ];
315            for (a, b) in TEXMF_CNF {
316                if !vars.contains_key(a) {
317                    vars.insert(a.to_string(), b.to_string());
318                }
319            }
320        }
321        let mut vars = HMap::<String, String>::default();
322        vars.insert(
323            "SELFAUTOLOC".to_string(),
324            selfautoloc.to_str().unwrap().to_string(),
325        );
326        if let Some(autodir) = selfautoloc.parent() {
327            vars.insert(
328                "SELFAUTODIR".to_string(),
329                autodir.to_str().unwrap().to_string(),
330            );
331            if let Some(autoparent) = autodir.parent() {
332                vars.insert(
333                    "SELFAUTOPARENT".to_string(),
334                    autoparent.to_str().unwrap().to_string(),
335                );
336                if let Some(autograndparent) = autoparent.parent() {
337                    vars.insert(
338                        "SELFAUTOGRANDPARENT".to_string(),
339                        autograndparent.to_str().unwrap().to_string(),
340                    );
341                } else {
342                    vars.insert(
343                        "SELFAUTOGRANDPARENT".to_string(),
344                        autoparent.to_str().unwrap().to_string(),
345                    );
346                }
347            } else {
348                vars.insert(
349                    "SELFAUTOPARENT".to_string(),
350                    autodir.to_str().unwrap().to_string(),
351                );
352                vars.insert(
353                    "SELFAUTOGRANDPARENT".to_string(),
354                    autodir.to_str().unwrap().to_string(),
355                );
356            }
357        } else {
358            vars.insert(
359                "SELFAUTODIR".to_string(),
360                selfautoloc.to_str().unwrap().to_string(),
361            );
362            vars.insert(
363                "SELFAUTOPARENT".to_string(),
364                selfautoloc.to_str().unwrap().to_string(),
365            );
366            vars.insert(
367                "SELFAUTOGRANDPARENT".to_string(),
368                selfautoloc.to_str().unwrap().to_string(),
369            );
370        }
371        if is_miktex {
372            miktex(&mut vars);
373        } else {
374            texlive(&mut vars);
375        }
376        vars.insert("progname".to_string(), "pdftex".to_string());
377        vars
378    }
379
380    fn paths_to_scan(vars: &mut HMap<String, String>) -> Vec<String> {
381        let mut todo = [
382            NamedVar("TEXINPUTS", false),
383            NamedVar("VARTEXFONTS", false),
384            NamedVar("VFFONTS", false),
385            NamedVar("TFMFONTS", false),
386            NamedVar("T1FONTS", false),
387            NamedVar("ENCFONTS", false),
388        ];
389
390        let mut ret = vec![];
391        for (k, mut v) in std::env::vars() {
392            if let Some(td) = todo.iter_mut().find(|x| x.0 == k) {
393                td.1 = true;
394                if v.ends_with(';') || v.ends_with(':') {
395                    if let Some(oldv) = vars.get(td.0) {
396                        v.push_str(oldv);
397                    }
398                }
399                ret.push(v);
400            } else {
401                match vars.entry(k) {
402                    Entry::Occupied(_) => (),
403                    Entry::Vacant(e) => {
404                        e.insert(v);
405                    }
406                }
407            }
408        }
409        for td in todo.iter().filter(|x| !x.1) {
410            if let Some(v) = vars.get(td.0) {
411                ret.push(v.clone());
412            }
413        }
414        ret
415    }
416}
417
418struct NamedVar(&'static str, bool);
419struct PathParser {
420    vars: HMap<String, String>,
421    diddot: bool,
422    recdot: bool,
423    predot: Vec<(PathBuf, bool)>,
424    postdot: Vec<(PathBuf, bool)>,
425    home: PathBuf,
426    resolved_vars: HMap<String, Vec<Vec<u8>>>,
427}
428struct StringSet(Vec<Vec<u8>>);
429impl StringSet {
430    fn push_string(&mut self, s: &[u8]) {
431        if s.is_empty() {
432            return;
433        }
434        if self.0.is_empty() {
435            self.0.push(s.to_vec())
436        } else {
437            for r in &mut self.0 {
438                r.extend(s)
439            }
440        }
441    }
442    fn new() -> StringSet {
443        StringSet(vec![])
444    }
445    fn split(&mut self, strs: &Vec<Vec<u8>>) {
446        if strs.len() == 1 {
447            self.push_string(&strs[0]);
448            return;
449        }
450        if strs.is_empty() {
451            return;
452        }
453        if self.0.is_empty() {
454            self.0 = strs.clone();
455            return;
456        }
457        let mut ret = vec![];
458        for s in &self.0 {
459            for r in strs {
460                let mut s = s.clone();
461                s.extend(r);
462                ret.push(s)
463            }
464        }
465        self.0 = ret;
466    }
467    fn clear(&mut self) -> Vec<Vec<u8>> {
468        std::mem::take(&mut self.0)
469    }
470}
471impl PathParser {
472    fn do_dir(&mut self, s: &str) {
473        for mut s in self
474            .parse_string(s)
475            .into_iter()
476            .map(|v| String::from_utf8(v).unwrap())
477        {
478            let mut recurse = false;
479            #[cfg(target_os = "windows")]
480            {
481                if s.ends_with("//") || s.ends_with("\\\\") {
482                    recurse = true;
483                    s.pop();
484                    s.pop();
485                } else if s.ends_with('/') || s.ends_with('\\') {
486                    s.pop();
487                }
488            }
489            #[cfg(not(target_os = "windows"))]
490            {
491                if s.ends_with("//") {
492                    recurse = true;
493                    s.pop();
494                    s.pop();
495                } else if s.ends_with('/') {
496                    s.pop();
497                }
498            }
499            if s == "." {
500                if self.diddot {
501                    continue;
502                }
503                self.diddot = true;
504                self.recdot = recurse;
505                continue;
506            }
507            self.push_path(Path::new(&s), recurse);
508        }
509    }
510    fn push_path(&mut self, p: &Path, rec: bool) {
511        let map = if self.diddot {
512            &mut self.postdot
513        } else {
514            &mut self.predot
515        };
516        for (ip, m) in map.iter_mut() {
517            if ip == p {
518                *m = *m || rec;
519                return;
520            }
521        }
522        map.push((p.to_path_buf(), rec));
523    }
524
525    fn parse_string(&mut self, s: &str) -> Vec<Vec<u8>> {
526        self.parse_bytes(s.as_bytes())
527    }
528    fn parse_bytes(&mut self, mut s: &[u8]) -> Vec<Vec<u8>> {
529        let mut ret: Vec<Vec<u8>> = vec![];
530        let mut currs = StringSet::new();
531        let breaks = if cfg!(target_os = "windows") {
532            [b';', b';', b'$', b'!', b'{', b'~']
533        } else {
534            [b';', b':', b'$', b'!', b'{', b'~']
535        };
536        while !s.is_empty() {
537            if let Some((i, b)) = s.iter().enumerate().find(|(_, c)| breaks.contains(*c)) {
538                let first = &s[..i];
539                currs.push_string(first);
540                s = &s[i + 1..];
541                match b {
542                    b'!' => (),
543                    b'~' => currs.push_string(self.home.to_str().unwrap().as_bytes()),
544                    b';' | b':' => ret.extend(currs.clear().into_iter()),
545                    b'$' => {
546                        let name = if let Some((i, _)) =
547                            s.iter().enumerate().find(|(_, c)| !c.is_ascii_alphabetic())
548                        {
549                            let r = &s[..i];
550                            s = &s[i..];
551                            r
552                        } else {
553                            let r = s;
554                            s = b"";
555                            r
556                        };
557                        let name = std::str::from_utf8(name).unwrap();
558                        let resolved = self.get_resolved_var(name);
559                        currs.split(resolved);
560                    }
561                    b'{' => {
562                        let mut inbracks = 0;
563                        let mut ret = vec![vec![]];
564                        while !s.is_empty() {
565                            let b = s[0];
566                            s = &s[1..];
567                            if b == b'{' {
568                                inbracks += 1
569                            } else if b == b'}' && inbracks == 0 {
570                                break;
571                            } else if b == b'}' {
572                                inbracks -= 1
573                            } else if b == b',' && inbracks == 0 {
574                                ret.push(vec![]);
575                                continue;
576                            }
577                            ret.last_mut().unwrap().push(b)
578                        }
579                        let v = ret
580                            .into_iter()
581                            .flat_map(|v| self.parse_bytes(v.as_slice()))
582                            .collect::<Vec<_>>();
583                        currs.split(&v);
584                    }
585                    _ => unreachable!(),
586                }
587            } else {
588                currs.push_string(s);
589                break;
590            }
591        }
592        ret.extend(currs.clear());
593        ret
594    }
595    fn get_resolved_var(&mut self, key: &str) -> &Vec<Vec<u8>> {
596        if !self.resolved_vars.contains_key(key) {
597            let val = self.vars.get(key).unwrap().clone();
598            let resolved = self.parse_string(&val);
599            self.resolved_vars.insert(key.to_string(), resolved);
600        }
601        self.resolved_vars.get(key).unwrap()
602    }
603    fn close(self, log: bool) -> (HMap<String, PathBuf>, bool, HMap<String, PathBuf>) {
604        (
605            Self::close_i(self.predot, log),
606            self.recdot,
607            Self::close_i(self.postdot, log),
608        )
609    }
610    fn close_i(v: Vec<(PathBuf, bool)>, log: bool) -> HMap<String, PathBuf> {
611        let mut ret = HMap::default();
612        for (p, rec) in v.into_iter().rev() {
613            if log {
614                println!("Checking {} ({rec})", p.display());
615            }
616            let len = p.to_str().unwrap().len() + 1;
617            for e in walkdir::WalkDir::new(&p)
618                .follow_links(true)
619                .min_depth(1)
620                .into_iter()
621                .filter_map(|e| match e {
622                    Err(e) if log => {
623                        println!("ERROR: {e}");
624                        None
625                    }
626                    Err(_) => None,
627                    Ok(s)
628                        if s.path()
629                            .components()
630                            .any(|c| c.as_os_str().to_str() == Some(".git")) =>
631                    {
632                        None
633                    }
634                    Ok(e) => Some(e),
635                })
636            {
637                let sub = &e.path().to_str().unwrap()[len..];
638                if sub.contains('.') {
639                    if log {
640                        println!("Adding {} ({rec})", e.path().display());
641                    }
642                    let sub = sub.to_string();
643                    let pb = e.path().to_path_buf();
644                    if sub.ends_with(".tex") {
645                        let sub = sub[..sub.len() - 4].to_string();
646                        ret.insert(sub, pb.clone());
647                    }
648                    if rec {
649                        let filename = pb.file_name().unwrap().to_str().unwrap();
650                        ret.insert(filename.to_string(), pb.clone());
651                        if sub.ends_with(".tex") {
652                            ret.insert(filename[..filename.len() - 4].to_string(), pb.clone());
653                        }
654                    }
655                    ret.insert(sub, pb);
656                }
657            }
658        }
659        ret
660    }
661}
662
663fn get_dot(recdot: bool, pwd: &Path) -> HMap<String, PathBuf> {
664    let mut ret = HMap::default();
665    let len = pwd.to_str().unwrap().len() + 1;
666    for e in walkdir::WalkDir::new(pwd)
667        .min_depth(1)
668        .into_iter()
669        .filter_map(|e| match e.ok() {
670            None => None,
671            Some(s) if s.path().as_os_str().to_str().unwrap().contains(".git") => None,
672            Some(e) => Some(e),
673        })
674    {
675        let sub = &e.path().to_str().unwrap()[len..];
676        let sub = sub.to_string();
677        let pb = e.path().to_path_buf();
678        if e.file_type().is_file() {
679            let filename = pb.file_name().unwrap().to_str().unwrap();
680            if sub.ends_with(".tex") {
681                let sub = sub[..sub.len() - 4].to_string();
682                ret.insert(sub, pb.clone());
683            }
684            if recdot {
685                ret.insert(filename.to_string(), pb.clone());
686                //ret.insert(filename.to_uppercase(), pb.clone());
687                if sub.ends_with(".tex") {
688                    ret.insert(filename[..filename.len() - 4].to_string(), pb.clone());
689                    //ret.insert(filename.to_uppercase()[..filename.len() - 4].to_string(), pb.clone());
690                }
691            }
692            //let sub2 = sub[..sub.len() - filename.len()].to_string() + filename.to_uppercase().as_str();
693            //ret.insert(sub2, pb.clone());
694            ret.insert(sub, pb);
695        }
696    }
697    ret
698}