Skip to main content

tex_engine/engine/filesystem/
kpathsea.rs

1/*! An implementation of (the central part of) kpathsea, the path searching library used by TeX.
2Used by instantiating a [Kpathsea] instance with the working directory.
3
4
5**Example:**
6```rust
7use tex_engine::engine::filesystem::kpathsea::Kpathsea;
8let kpse = Kpathsea::new(std::env::current_dir().unwrap());
9assert!(kpse.kpsewhich("latex.ltx").path.to_str().unwrap().replace('\\' ,"/").ends_with("tex/latex/base/latex.ltx"));
10assert!(kpse.kpsewhich("article.cls").path.to_str().unwrap().replace('\\' ,"/").ends_with("tex/latex/base/article.cls"));
11// as expected, the `.tex` file extension is optional:
12assert!(kpse.kpsewhich("expl3-code").path.to_str().unwrap().replace('\\' ,"/").ends_with("tex/latex/l3kernel/expl3-code.tex"));
13```
14*/
15
16use crate::utils::HMap;
17use lazy_static::lazy_static;
18use std::collections::hash_map::Entry;
19use std::fmt::Debug;
20use std::fs::File;
21use std::io::BufRead;
22use std::path::{Path, PathBuf};
23use std::sync::Arc;
24
25/// The result of a [`Kpathsea`] search.
26/// TODO capitalization might be an issue. TeX is more permissive wrt case distinctions
27/// than the filesystem, (apparently) iff either not covered by an ls-R file or
28/// directly in a TEXINPUTS path...?
29pub struct KpseResult {
30    /// The path to the file.
31    pub path: PathBuf,
32    /// Whether the file exists.
33    pub exists: bool,
34}
35
36lazy_static! {
37    pub static ref KPATHSEA: Arc<KpathseaBase> = Arc::new(KpathseaBase::new());
38}
39
40/** A "database" of paths to search for files. Notably, the "global" part (e.g. the system-wide
41`TEXINPUTS`, `TEXMF`, etc.) is shared between all instances of [`Kpathsea`].
42and lazily computed on first use.
43 **/
44#[derive(Clone)]
45pub struct Kpathsea {
46    pub pwd: PathBuf,
47    pub local: HMap<String, PathBuf>,
48    pub global: Arc<KpathseaBase>,
49}
50impl Kpathsea {
51    /// Create a new [`Kpathsea`] instance with the given working directory.
52    pub fn new(pwd: PathBuf) -> Kpathsea {
53        let global = KPATHSEA.clone();
54        let local = get_dot(global.recdot, &pwd);
55        Self { pwd, local, global }
56    }
57
58    /// Search for a file in the database.
59    pub fn kpsewhich<S: AsRef<str>>(&self, filestr: S) -> KpseResult {
60        use path_dedot::*;
61        let filestr = filestr.as_ref();
62        if filestr.starts_with("|kpsewhich") {
63            return KpseResult {
64                path: self.pwd.join(filestr),
65                exists: true,
66            };
67        }
68        if filestr.starts_with("nul:") && cfg!(target_os = "windows") {
69            return KpseResult {
70                path: PathBuf::from(filestr),
71                exists: true,
72            };
73        } else if filestr.starts_with("nul:") {
74            return KpseResult {
75                path: self.pwd.join(filestr),
76                exists: false,
77            };
78        } else if filestr.is_empty() {
79            panic!("Empty string in kpsewhich")
80        }
81        if filestr.contains("./") {
82            let p1 = self.pwd.join(Path::new(filestr));
83            let p = p1.parse_dot().unwrap();
84            if p.is_file() {
85                return KpseResult {
86                    exists: true,
87                    path: p.to_path_buf(),
88                };
89            }
90            let q = PathBuf::from(p.display().to_string() + ".tex");
91            if q.is_file() {
92                return KpseResult {
93                    exists: true,
94                    path: q,
95                };
96            }
97            return KpseResult {
98                exists: false,
99                path: p.to_path_buf(),
100            };
101        }
102        if Path::new(filestr).is_absolute() {
103            if Path::new(filestr).is_file() {
104                return KpseResult {
105                    path: PathBuf::from(&filestr),
106                    exists: true,
107                };
108            }
109            let pb = PathBuf::from(filestr.to_string() + ".tex");
110            if pb.is_file() {
111                return KpseResult {
112                    path: pb,
113                    exists: true,
114                };
115            }
116            return KpseResult {
117                path: PathBuf::from(&filestr),
118                exists: false,
119            };
120            //else {return KpseResult{path:PathBuf::from(format!("{}.tex",filestr)),exists:false} }
121        }
122        if let Some(p) = self.global.pre.get(filestr) {
123            return KpseResult {
124                path: p.clone(),
125                exists: true,
126            };
127        }
128        if let Some(p) = self.local.get(filestr) {
129            return KpseResult {
130                path: p.clone(),
131                exists: true,
132            };
133        }
134        if let Some(p) = self.global.post.get(filestr) {
135            return KpseResult {
136                path: p.clone(),
137                exists: true,
138            };
139        }
140        let p = self.pwd.join(Path::new(filestr));
141        KpseResult {
142            exists: p.exists(),
143            path: p,
144        }
145    }
146}
147
148/// The "base" part of [`Kpathsea`] holding information about the "global" parts of the file database, which is
149/// (or should be) shared between all instances. Never needs to be instantiated directly;
150/// use the canoncial [`static@KPATHSEA`] instance instead.
151#[derive(Clone, Debug)]
152pub struct KpathseaBase {
153    /// The paths to search before the working directory.
154    pub pre: HMap<String, PathBuf>,
155    /// Whether to search recursively in the working directory.
156    pub recdot: bool,
157    /// The paths to search after the working directory.
158    pub post: HMap<String, PathBuf>,
159}
160
161pub static LOG_KPATHSEA: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
162
163impl KpathseaBase {
164    fn new() -> KpathseaBase {
165        let log = LOG_KPATHSEA.load(std::sync::atomic::Ordering::Relaxed);
166        let locout = std::process::Command::new("kpsewhich")
167            .args(vec!["-var-value", "SELFAUTOLOC"])
168            .output()
169            .expect("kpsewhich not found!")
170            .stdout;
171        let loc = std::str::from_utf8(locout.as_slice()).unwrap().trim();
172        let selfautoloc = Path::new(loc);
173
174        let (pre, dot, post) = if loc.contains("miktex") {
175            todo!()
176        } else {
177            let mut vars = Self::get_vars(selfautoloc);
178            let home = if cfg!(target_os = "windows") {
179                std::env::vars().find(|x| x.0 == "HOMEDRIVE").unwrap().1
180                    + &std::env::vars().find(|x| x.0 == "HOMEPATH").unwrap().1
181            } else {
182                std::env::vars().find(|x| x.0 == "HOME").unwrap().1
183            };
184            if log {
185                println!("Variables:\n-------------------------");
186                for (k, v) in &vars {
187                    println!("{k}:   {v}");
188                }
189            }
190
191            let paths = Self::paths_to_scan(&mut vars);
192            if log {
193                println!("-------------------------\nScan paths:\n-------------------------");
194                for p in &paths {
195                    println!("{}", p);
196                }
197            }
198            let mut parser = PathParser {
199                vars,
200                diddot: false,
201                recdot: false,
202                predot: vec![],
203                postdot: vec![],
204                home: PathBuf::from(home),
205                resolved_vars: HMap::default(),
206            };
207
208            for s in paths {
209                parser.do_dir(&s);
210            }
211
212            if log {
213                println!(
214                    "-------------------------\nResolved variables:\n-------------------------"
215                );
216                for (k, v) in &parser.resolved_vars {
217                    let val = v
218                        .iter()
219                        .map(|v| String::from_utf8_lossy(v))
220                        .collect::<Vec<_>>()
221                        .join("; ");
222                    println!("{k}:   {val}");
223                }
224                println!("-------------------------\nResolved paths:\n-------------------------");
225                for (p, b) in &parser.predot {
226                    println!("{} ({b})", p.display());
227                }
228                for (p, b) in &parser.postdot {
229                    println!("{} ({b})", p.display());
230                }
231                println!("-------------------------\n");
232            }
233
234            let r = if log {
235                parser.close::<true>()
236            } else {
237                parser.close::<false>()
238            };
239            if log {
240                println!("-------------------------\n");
241            }
242            r
243        };
244        KpathseaBase {
245            pre,
246            recdot: dot,
247            post,
248        }
249    }
250
251    /// Search for a file in the database; basically `kpsewhich <file>`, but without the working directory.
252    pub fn which<S: AsRef<str>>(&self, filestr: S) -> Option<PathBuf> {
253        let filestr = filestr.as_ref();
254        if Path::new(filestr).is_absolute() {
255            return Some(PathBuf::from(filestr));
256        }
257        if let Some(p) = self.pre.get(filestr) {
258            return Some(p.clone());
259        }
260        if let Some(p) = self.post.get(filestr) {
261            return Some(p.clone());
262        }
263        None
264    }
265
266    fn get_vars(selfautoloc: &Path) -> HMap<String, String> {
267        let mut vars = HMap::<String, String>::default();
268        vars.insert(
269            "SELFAUTOLOC".to_string(),
270            selfautoloc.to_str().unwrap().to_string(),
271        );
272        if let Some(autodir) = selfautoloc.parent() {
273            vars.insert(
274                "SELFAUTODIR".to_string(),
275                autodir.to_str().unwrap().to_string(),
276            );
277            if let Some(autoparent) = autodir.parent() {
278                vars.insert(
279                    "SELFAUTOPARENT".to_string(),
280                    autoparent.to_str().unwrap().to_string(),
281                );
282                if let Some(autograndparent) = autoparent.parent() {
283                    vars.insert(
284                        "SELFAUTOGRANDPARENT".to_string(),
285                        autograndparent.to_str().unwrap().to_string(),
286                    );
287                } else {
288                    vars.insert(
289                        "SELFAUTOGRANDPARENT".to_string(),
290                        autoparent.to_str().unwrap().to_string(),
291                    );
292                }
293            } else {
294                vars.insert(
295                    "SELFAUTOPARENT".to_string(),
296                    autodir.to_str().unwrap().to_string(),
297                );
298                vars.insert(
299                    "SELFAUTOGRANDPARENT".to_string(),
300                    autodir.to_str().unwrap().to_string(),
301                );
302            }
303        } else {
304            vars.insert(
305                "SELFAUTODIR".to_string(),
306                selfautoloc.to_str().unwrap().to_string(),
307            );
308            vars.insert(
309                "SELFAUTOPARENT".to_string(),
310                selfautoloc.to_str().unwrap().to_string(),
311            );
312            vars.insert(
313                "SELFAUTOGRANDPARENT".to_string(),
314                selfautoloc.to_str().unwrap().to_string(),
315            );
316        }
317
318        let out = std::process::Command::new("kpsewhich")
319            .args(vec!["-a", "texmf.cnf"])
320            .output()
321            .expect("kpsewhich not found!")
322            .stdout;
323        let outstr = std::str::from_utf8(out.as_slice()).unwrap();
324        let rs = outstr
325            .split('\n')
326            .map(|x| x.trim())
327            .filter(|s| !s.is_empty());
328        for r in rs {
329            let p = Path::new(r);
330            if let Ok(f) = File::open(p) {
331                let lines = std::io::BufReader::new(f).lines();
332                for l in lines.map_while(Result::ok) {
333                    if !l.starts_with('%') && !l.is_empty() {
334                        let mut kb = l.split('=').map(|x| x.trim()).collect::<Vec<_>>();
335                        if kb.len() == 2 {
336                            let v = kb.pop().unwrap();
337                            let mut k = kb.pop().unwrap();
338                            if let Some(i) = k.find('.') {
339                                let pre = &k[..i];
340                                let post = &k[i + 1..];
341                                if post != "pdftex" {
342                                    continue;
343                                }
344                                k = pre;
345                            }
346                            if k.chars().any(|c| c.is_lowercase()) {
347                                continue;
348                            }
349                            match vars.entry(k.to_string()) {
350                                Entry::Occupied(_) => (),
351                                Entry::Vacant(e) => {
352                                    e.insert(v.to_string());
353                                }
354                            }
355                        }
356                    }
357                }
358            }
359        }
360        vars.insert("progname".to_string(), "pdftex".to_string());
361        vars
362    }
363
364    fn paths_to_scan(vars: &mut HMap<String, String>) -> Vec<String> {
365        let mut todo = [
366            NamedVar("TEXINPUTS", false),
367            NamedVar("VARTEXFONTS", false),
368            NamedVar("VFFONTS", false),
369            NamedVar("TFMFONTS", false),
370            NamedVar("T1FONTS", false),
371            NamedVar("ENCFONTS", false),
372        ];
373
374        let mut ret = vec![];
375        for (k, mut v) in std::env::vars() {
376            if let Some(td) = todo.iter_mut().find(|x| x.0 == k) {
377                td.1 = true;
378                if v.ends_with(';') || v.ends_with(':') {
379                    if let Some(oldv) = vars.get(td.0) {
380                        v.push_str(oldv);
381                    }
382                }
383                ret.push(v);
384            } else {
385                match vars.entry(k) {
386                    Entry::Occupied(_) => (),
387                    Entry::Vacant(e) => {
388                        e.insert(v);
389                    }
390                }
391            }
392        }
393        for td in todo.iter().filter(|x| !x.1) {
394            if let Some(v) = vars.get(td.0) {
395                ret.push(v.clone());
396            }
397        }
398        ret
399    }
400}
401
402struct NamedVar(&'static str, bool);
403struct PathParser {
404    vars: HMap<String, String>,
405    diddot: bool,
406    recdot: bool,
407    predot: Vec<(PathBuf, bool)>,
408    postdot: Vec<(PathBuf, bool)>,
409    home: PathBuf,
410    resolved_vars: HMap<String, Vec<Vec<u8>>>,
411}
412struct StringSet(Vec<Vec<u8>>);
413impl StringSet {
414    fn push_string(&mut self, s: &[u8]) {
415        if s.is_empty() {
416            return;
417        }
418        if self.0.is_empty() {
419            self.0.push(s.to_vec())
420        } else {
421            for r in &mut self.0 {
422                r.extend(s)
423            }
424        }
425    }
426    fn new() -> StringSet {
427        StringSet(vec![])
428    }
429    fn split(&mut self, strs: &Vec<Vec<u8>>) {
430        if strs.len() == 1 {
431            self.push_string(&strs[0]);
432            return;
433        }
434        if strs.is_empty() {
435            return;
436        }
437        if self.0.is_empty() {
438            self.0 = strs.clone();
439            return;
440        }
441        let mut ret = vec![];
442        for s in &self.0 {
443            for r in strs {
444                let mut s = s.clone();
445                s.extend(r);
446                ret.push(s)
447            }
448        }
449        self.0 = ret;
450    }
451    fn clear(&mut self) -> Vec<Vec<u8>> {
452        std::mem::take(&mut self.0)
453    }
454}
455impl PathParser {
456    fn do_dir(&mut self, s: &str) {
457        for mut s in self
458            .parse_string(s)
459            .into_iter()
460            .map(|v| String::from_utf8(v).unwrap())
461        {
462            let mut recurse = false;
463            if s.ends_with("//") {
464                recurse = true;
465                s.pop();
466                s.pop();
467            } else if s.ends_with('/') {
468                s.pop();
469            }
470            if s == "." {
471                if self.diddot {
472                    continue;
473                }
474                self.diddot = true;
475                self.recdot = recurse;
476                continue;
477            }
478            self.push_path(Path::new(&s), recurse);
479        }
480    }
481    fn push_path(&mut self, p: &Path, rec: bool) {
482        let map = if self.diddot {
483            &mut self.postdot
484        } else {
485            &mut self.predot
486        };
487        for (ip, m) in map.iter_mut() {
488            if ip == p {
489                *m = *m || rec;
490                return;
491            }
492        }
493        map.push((p.to_path_buf(), rec));
494    }
495
496    fn parse_string(&mut self, s: &str) -> Vec<Vec<u8>> {
497        self.parse_bytes(s.as_bytes())
498    }
499    fn parse_bytes(&mut self, mut s: &[u8]) -> Vec<Vec<u8>> {
500        let mut ret: Vec<Vec<u8>> = vec![];
501        let mut currs = StringSet::new();
502        let breaks = if cfg!(target_os = "windows") {
503            [b';', b';', b'$', b'!', b'{', b'~']
504        } else {
505            [b';', b':', b'$', b'!', b'{', b'~']
506        };
507        while !s.is_empty() {
508            if let Some((i, b)) = s.iter().enumerate().find(|(_, c)| breaks.contains(*c)) {
509                let first = &s[..i];
510                currs.push_string(first);
511                s = &s[i + 1..];
512                match b {
513                    b'!' => (),
514                    b'~' => currs.push_string(self.home.to_str().unwrap().as_bytes()),
515                    b';' | b':' => ret.extend(currs.clear().into_iter()),
516                    b'$' => {
517                        let name = if let Some((i, _)) =
518                            s.iter().enumerate().find(|(_, c)| !c.is_ascii_alphabetic())
519                        {
520                            let r = &s[..i];
521                            s = &s[i..];
522                            r
523                        } else {
524                            let r = s;
525                            s = b"";
526                            r
527                        };
528                        let name = std::str::from_utf8(name).unwrap();
529                        let resolved = self.get_resolved_var(name);
530                        currs.split(resolved);
531                    }
532                    b'{' => {
533                        let mut inbracks = 0;
534                        let mut ret = vec![vec![]];
535                        while !s.is_empty() {
536                            let b = s[0];
537                            s = &s[1..];
538                            if b == b'{' {
539                                inbracks += 1
540                            } else if b == b'}' && inbracks == 0 {
541                                break;
542                            } else if b == b'}' {
543                                inbracks -= 1
544                            } else if b == b',' && inbracks == 0 {
545                                ret.push(vec![]);
546                                continue;
547                            }
548                            ret.last_mut().unwrap().push(b)
549                        }
550                        let v = ret
551                            .into_iter()
552                            .flat_map(|v| self.parse_bytes(v.as_slice()))
553                            .collect::<Vec<_>>();
554                        currs.split(&v);
555                    }
556                    _ => unreachable!(),
557                }
558            } else {
559                currs.push_string(s);
560                break;
561            }
562        }
563        ret.extend(currs.clear());
564        ret
565    }
566    fn get_resolved_var(&mut self, key: &str) -> &Vec<Vec<u8>> {
567        if !self.resolved_vars.contains_key(key) {
568            let val = self.vars.get(key).unwrap().clone();
569            let resolved = self.parse_string(&val);
570            self.resolved_vars.insert(key.to_string(), resolved);
571        }
572        self.resolved_vars.get(key).unwrap()
573    }
574    fn close<const LOG: bool>(self) -> (HMap<String, PathBuf>, bool, HMap<String, PathBuf>) {
575        (
576            Self::close_i::<LOG>(self.predot),
577            self.recdot,
578            Self::close_i::<LOG>(self.postdot),
579        )
580    }
581    fn close_i<const LOG: bool>(v: Vec<(PathBuf, bool)>) -> HMap<String, PathBuf> {
582        let mut ret = HMap::default();
583        for (p, rec) in v.into_iter().rev() {
584            if LOG {
585                println!("Checking {} ({rec})", p.display());
586            }
587            let len = p.to_str().unwrap().len() + 1;
588            for e in walkdir::WalkDir::new(&p)
589                .follow_links(true)
590                .min_depth(1)
591                .into_iter()
592                .filter_map(|e| match e {
593                    Err(e) if LOG => {
594                        println!("ERROR: {e}");
595                        None
596                    }
597                    Err(_) => None,
598                    Ok(s)
599                        if s.path()
600                            .components()
601                            .any(|c| c.as_os_str().to_str() == Some(".git")) =>
602                    {
603                        None
604                    }
605                    Ok(e) => Some(e),
606                })
607            {
608                let sub = &e.path().to_str().unwrap()[len..];
609                if sub.contains('.') {
610                    if LOG {
611                        println!("Adding {} ({rec})", e.path().display());
612                    }
613                    let sub = sub.to_string();
614                    let pb = e.path().to_path_buf();
615                    if sub.ends_with(".tex") {
616                        let sub = sub[..sub.len() - 4].to_string();
617                        ret.insert(sub, pb.clone());
618                    }
619                    if rec {
620                        let filename = pb.file_name().unwrap().to_str().unwrap();
621                        ret.insert(filename.to_string(), pb.clone());
622                        if sub.ends_with(".tex") {
623                            ret.insert(filename[..filename.len() - 4].to_string(), pb.clone());
624                        }
625                    }
626                    ret.insert(sub, pb);
627                }
628            }
629        }
630        ret
631    }
632}
633
634fn get_dot(recdot: bool, pwd: &Path) -> HMap<String, PathBuf> {
635    let mut ret = HMap::default();
636    let len = pwd.to_str().unwrap().len() + 1;
637    for e in walkdir::WalkDir::new(pwd)
638        .min_depth(1)
639        .into_iter()
640        .filter_map(|e| match e.ok() {
641            None => None,
642            Some(s) if s.path().as_os_str().to_str().unwrap().contains(".git") => None,
643            Some(e) => Some(e),
644        })
645    {
646        let sub = &e.path().to_str().unwrap()[len..];
647        let sub = sub.to_string();
648        let pb = e.path().to_path_buf();
649        if e.file_type().is_file() {
650            let filename = pb.file_name().unwrap().to_str().unwrap();
651            if sub.ends_with(".tex") {
652                let sub = sub[..sub.len() - 4].to_string();
653                ret.insert(sub, pb.clone());
654            }
655            if recdot {
656                ret.insert(filename.to_string(), pb.clone());
657                //ret.insert(filename.to_uppercase(), pb.clone());
658                if sub.ends_with(".tex") {
659                    ret.insert(filename[..filename.len() - 4].to_string(), pb.clone());
660                    //ret.insert(filename.to_uppercase()[..filename.len() - 4].to_string(), pb.clone());
661                }
662            }
663            //let sub2 = sub[..sub.len() - filename.len()].to_string() + filename.to_uppercase().as_str();
664            //ret.insert(sub2, pb.clone());
665            ret.insert(sub, pb);
666        }
667    }
668    ret
669}