flams_stex/
lib.rs

1//#![feature(lazy_type_alias)]
2#![cfg_attr(docsrs, feature(doc_auto_cfg))]
3
4mod dependencies;
5mod latex;
6pub mod quickparse;
7mod rustex;
8use std::{
9    fs,
10    io::Read,
11    path::{Path, PathBuf},
12};
13
14use either::Either;
15use eyre::Context;
16use flams_ftml::{HTMLString, FTML_DOC, FTML_OMDOC};
17use flams_ontology::uris::{ArchiveId, ArchiveURITrait, DocumentURI, PathURITrait, URIRefTrait};
18use flams_system::{
19    backend::{
20        archives::{Archive, ArchiveOrGroup, LocalArchive},
21        AnyBackend, Backend, GlobalBackend,
22    },
23    build_result, build_target,
24    building::{BuildResult, BuildResultArtifact, BuildTask},
25    flams_extension,
26    formats::{CHECK, PDF},
27    source_format,
28};
29use flams_utils::vecmap::VecSet;
30pub use rustex::{OutputCont, RusTeX};
31
32use crate::dependencies::STeXDependency;
33
34source_format!(stex ["tex","ltx"] [
35  PDFLATEX_FIRST => PDFLATEX => RUSTEX => FTML_OMDOC => CHECK]
36   @ "(Semantically annotated) LaTeX"
37   = dependencies::get_deps
38);
39
40build_target!(
41  pdflatex_first [] => [AUX]
42  @ "Run pdflatex and bibtex/biber/index once"
43  = pdflatex_first
44);
45
46fn pdflatex_first(backend: &AnyBackend, task: &BuildTask) -> BuildResult {
47    let Either::Left(path) = task.source() else {
48        return BuildResult {
49            log: Either::Left("Needs a physical file".to_string()),
50            result: Err(Vec::new()),
51        };
52    };
53    latex::clean(path);
54    let log = path.with_extension("log");
55    let mh = backend
56        .mathhubs()
57        .into_iter()
58        .map(|p| p.display().to_string())
59        .collect::<Vec<_>>()
60        .join(",");
61    let ret = latex::pdflatex_and_bib(path, [("STEX_WRITESMS", "true"), ("MATHHUB", &mh)]);
62    if ret.is_ok() {
63        BuildResult {
64            log: Either::Right(log),
65            result: Ok(BuildResultArtifact::File(PDF, path.with_extension("pdf"))),
66        }
67    } else {
68        BuildResult {
69            log: Either::Right(log),
70            result: Err(Vec::new()),
71        }
72    }
73}
74
75build_target!(
76  pdflatex [AUX] => [PDF]
77  @ "Run pdflatex a second time"
78  = pdflatex_second
79);
80
81fn pdflatex_second(backend: &AnyBackend, task: &BuildTask) -> BuildResult {
82    let Either::Left(path) = task.source() else {
83        return BuildResult {
84            log: Either::Left("Needs a physical file".to_string()),
85            result: Err(Vec::new()),
86        };
87    };
88    let log = path.with_extension("log");
89    let mh = backend
90        .mathhubs()
91        .into_iter()
92        .map(|p| p.display().to_string())
93        .collect::<Vec<_>>()
94        .join(",");
95    let ret = latex::pdflatex(path, [("STEX_USESMS", "true"), ("MATHHUB", &mh)]);
96    if ret.is_ok() {
97        BuildResult {
98            log: Either::Right(log),
99            result: Ok(BuildResultArtifact::File(PDF, path.with_extension("pdf"))),
100        }
101    } else {
102        BuildResult {
103            log: Either::Right(log),
104            result: Err(Vec::new()),
105        }
106    }
107}
108
109build_target!(
110  rustex [AUX] => [FTML_DOC]
111  @ "Run RusTeX tex->html only"
112  = rustex
113);
114
115fn rustex(backend: &AnyBackend, task: &BuildTask) -> BuildResult {
116    // TODO make work with string as well
117    let Either::Left(path) = task.source() else {
118        return BuildResult {
119            log: Either::Left("Needs a physical file".to_string()),
120            result: Err(Vec::new()),
121        };
122    };
123    let out = path.with_extension("rlog");
124    let ocl = out.clone();
125    let mh = backend
126        .mathhubs()
127        .into_iter()
128        .map(|p| p.display().to_string())
129        .collect::<Vec<_>>()
130        .join(",");
131    let run = move || {
132        RusTeX::get()
133            .map_err(|()| "Could not initialize RusTeX".to_string())
134            .and_then(|e| {
135                std::panic::catch_unwind(move || {
136                    e.run_with_envs(
137                        path,
138                        false,
139                        [
140                            ("STEX_USESMS".to_string(), "true".to_string()),
141                            ("MATHHUB".to_string(), mh),
142                        ],
143                        Some(&ocl),
144                    )
145                })
146                .map_err(|e| {
147                    if let Some(s) = e.downcast_ref::<&str>() {
148                        s.to_string()
149                    } else if let Ok(s) = e.downcast::<String>() {
150                        *s
151                    } else {
152                        "Unknown RusTeX error".to_string()
153                    }
154                })
155            })
156    };
157    #[cfg(debug_assertions)]
158    let ret = {
159        std::thread::scope(move |s| {
160            std::thread::Builder::new()
161                .stack_size(16 * 1024 * 1024)
162                .spawn_scoped(s, run)
163                .expect("foo")
164                .join()
165                .expect("foo")
166        })
167    };
168    #[cfg(not(debug_assertions))]
169    let ret = { run() };
170    match ret {
171        Err(s) => BuildResult {
172            log: Either::Left(s),
173            result: Err(Vec::new()),
174        },
175        Ok(Err(_)) => BuildResult {
176            log: Either::Right(out),
177            result: Err(Vec::new()),
178        },
179        Ok(Ok(s)) => {
180            latex::clean(path);
181            BuildResult {
182                log: Either::Right(out),
183                result: Ok(HTMLString::create(s)),
184            }
185        }
186    }
187}
188
189build_result!(aux @ "LaTeX aux/bbl/toc files, as generated by pdflatex+bibtex/biber/mkindex");
190
191flams_extension!(stex_ext = RusTeX::initialize);
192
193lazy_static::lazy_static! {
194    static ref OPTIONS : regex::Regex = unsafe{ regex::Regex::new(
195        r"\\(?<cmd>documentclass|usepackage|RequirePackage)(?<opts>\[[^\]]*\])?\{(?<name>notesslides|stex|hwexam|problem)\}"
196    ).unwrap_unchecked() };
197    static ref LIBS: regex::Regex = unsafe{ regex::Regex::new(
198        r"\\libinput\{"
199    ).unwrap_unchecked()};
200}
201
202macro_rules! err {
203    ($fmt:expr) => {return Err(eyre::eyre!($fmt))};
204    ($fmt:expr, $($args:tt)*) => {return Err(eyre::eyre!($fmt,$($args)*))};
205    ($e:expr => $fmt:expr) => { $e.wrap_err($fmt)?};
206    ($e:expr => $fmt:expr, $($args:tt)*) => { $e.wrap_err_with(|| format!($fmt,$($args)*))?};
207}
208
209pub fn export_standalone(doc: &DocumentURI, file: &Path, target_dir: &Path) -> eyre::Result<()> {
210    use std::fmt::Write;
211    if !file.extension().is_some_and(|e| e == "tex") {
212        err!("Not a .tex file: {}", file.display());
213    }
214
215    // safe, because we earlier checked that it has extension .tex => it has a file name
216    let file_name = unsafe { file.file_name().unwrap_unchecked() };
217
218    let mh_path = target_dir.join("mathhub");
219    err!(
220        std::fs::create_dir_all(&mh_path) =>
221        "Invalid target directory: {}",
222        mh_path.display()
223    );
224    let archive = doc.archive_id();
225
226    let mh = flams_system::settings::Settings::get()
227        .mathhubs
228        .iter()
229        .map(|p| p.display().to_string())
230        .collect::<Vec<_>>()
231        .join(",");
232    let Ok(()) = latex::pdflatex_and_bib(file, [("STEX_WRITESMS", "true"), ("MATHHUB", &mh)])
233    else {
234        err!(
235            "failed to build {}\nCheck .log file for details",
236            file.display()
237        );
238    };
239
240    let sms = file.with_extension("sms");
241    let sms_target = target_dir.join(file_name).with_extension("sms");
242    err!(std::fs::copy(&sms, &sms_target) => "Failed to copy file {}",sms.display() );
243
244    let orig_txt = err!(
245        std::fs::read_to_string(file) =>
246        "failed to open file {}",
247        file.display()
248    );
249    let Some(begin) = orig_txt.find("\\begin{document}") else {
250        err!("No \\begin{{document}} found!")
251    };
252    let mut txt = orig_txt[..begin].to_string();
253    //orig_txt.truncate(begin);
254    let rel_path = if let Some(p) = doc.path() {
255        format!("{p}/{}", file_name.display())
256    } else {
257        file_name.display().to_string()
258    };
259    err!(
260        write!(
261            txt,
262            "\n\\begin{{document}}\n  \\inputref[{archive}]{{{rel_path}}}\n\\end{{document}}"
263        ) =>
264        "impossible",
265    );
266
267    let mut matched = false;
268    let txt = OPTIONS.replace(&txt, |cap: &regex::Captures<'_>| {
269        matched = true;
270        // This is safe, because the named groups are necessary components of the regex, so a match
271        // entails they are defined.
272        let (cmd, name) = unsafe {
273            (
274                cap.name("cmd").unwrap_unchecked().as_str(),
275                cap.name("name").unwrap_unchecked().as_str(),
276            )
277        };
278        if let Some(opts) = cap.name("opts") {
279            format!(
280                "\\{cmd}[{},mathhub=./mathhub,usesms]{{{name}}}",
281                &opts.as_str()[1..opts.as_str().len() - 1]
282            )
283        } else {
284            format!("\\{cmd}[mathhub=./mathhub,usesms]{{{name}}}")
285        }
286    });
287    if !matched {
288        err!(
289            "No sTeX \\documentclass or \\package found in {}",
290            file.display()
291        );
292    }
293    let rep = format!("\\libinput[{archive}]{{");
294    let txt = LIBS.replace_all(&txt, &rep);
295
296    let tex_target = target_dir.join(file_name);
297    err!(std::fs::write(&tex_target, txt.as_bytes()) => "Failed to write to file {}",tex_target.display());
298
299    copy("stex.sty", &target_dir)?;
300    copy("stex-logo.sty", &target_dir)?;
301    copy("stex-backend-pdflatex.cfg", &target_dir)?;
302    copy("stex-highlighting.sty", &target_dir)?;
303    copy("stexthm.sty", &target_dir)?;
304    // stex-compat?
305
306    let mut todos = vec![(orig_txt, file.to_owned(), doc.clone())];
307    let mut archives = VecSet(Vec::with_capacity(4));
308    while let Some((txt, f, d)) = todos.pop() {
309        if !archives.0.contains(d.archive_id()) {
310            archives.0.push(d.archive_id().clone());
311            do_archive(d.archive_id(), &mh_path)?;
312        }
313        // by construction, the files in todos have a file name
314        let name = unsafe { f.file_name().unwrap_unchecked() };
315        let target_file = if let Some(p) = d.path() {
316            mh_path
317                .join(d.archive_id().to_string())
318                .join("source")
319                .join(p.to_string()) //.join(name)
320        } else {
321            mh_path.join(d.archive_id().to_string()).join("source") //.join(name)
322        };
323        err!(std::fs::create_dir_all(&target_file) => "Failed to create directory {}",target_file.display());
324        let target_file = target_file.join(name);
325        err!(std::fs::copy(&f, target_file) => "Failed to copy file {}",f.display());
326        for dep in dependencies::parse_deps(&txt, &f, &d, &GlobalBackend::get().to_any()) {
327            match dep {
328                STeXDependency::Inputref { archive, filepath } => {
329                    let archive = archive.as_ref().unwrap_or(d.archive_id());
330                    let Some((d, f)) = GlobalBackend::get().with_local_archive(archive, |a| {
331                        a.and_then(|a| {
332                            let f = a.path().join("source").join(&*filepath);
333                            let d = DocumentURI::from_archive_relpath(a.uri().owned(), &*filepath)
334                                .ok()?;
335                            Some((d, f))
336                        })
337                    }) else {
338                        err!("Could not find document for file {}", f.display())
339                    };
340                    let txt = err!(
341                        std::fs::read_to_string(&f) =>
342                        "failed to open file {}",
343                        f.display()
344                    );
345                    todos.push((txt, f, d));
346                }
347                STeXDependency::Img { archive, filepath } => {
348                    let archive = archive.as_ref().unwrap_or(d.archive_id());
349                    let Some(source) = GlobalBackend::get().with_local_archive(archive, |a| {
350                        a.map(|a| a.path().join("source").join(&*filepath))
351                    }) else {
352                        err!("Could not find image file {}", f.display())
353                    };
354                    let img_target = mh_path
355                        .join(archive.to_string())
356                        .join("source")
357                        .join(&*filepath);
358                    if !source.exists() {
359                        err!("img file not found: {}", source.display())
360                    }
361                    // safe, because file exists and is not root
362                    let parent = unsafe { img_target.parent().unwrap_unchecked() };
363                    err!(std::fs::create_dir_all(&parent) => "Error creating directory {}",parent.display());
364                    err!(std::fs::copy(&source,&img_target) => "Error copying {}",img_target.display());
365                }
366                STeXDependency::ImportModule { .. }
367                | STeXDependency::UseModule { .. }
368                | STeXDependency::Module { .. } => (),
369            }
370        }
371    }
372
373    Ok(())
374}
375
376fn copy(name: &str, to: &Path) -> eyre::Result<()> {
377    let Some(sty) = tex_engine::engine::filesystem::kpathsea::KPATHSEA.which(name) else {
378        err!("No {name} found")
379    };
380    let sty_target = to.join(name);
381    err!(std::fs::copy(sty, sty_target) =>"Failed to copy {name}");
382    Ok(())
383}
384
385fn do_archive(id: &ArchiveId, target: &Path) -> eyre::Result<()> {
386    GlobalBackend::get().manager().with_tree(|t| {
387        let mut steps = id.steps();
388        let Some(mut current) = steps.next() else {
389            err!("empty archive ID");
390        };
391        let mut ls = &t.groups;
392        loop {
393            let Some(a) = ls.iter().find(|a| a.id().last_name() == current) else {
394                err!("archive not found: {id}");
395            };
396            match a {
397                ArchiveOrGroup::Archive(_) => {
398                    if steps.next().is_some() {
399                        err!("archive not found: {id}");
400                    }
401                    let Some(Archive::Local(a)) = t.get(id) else {
402                        err!("Not a local archive: {id}")
403                    };
404                    return do_manifest(a, target);
405                }
406                ArchiveOrGroup::Group(g) => {
407                    let Some(next) = steps.next() else {
408                        err!("archive not found: {id}");
409                    };
410                    current = next;
411                    ls = &g.children;
412                    if let Some(ArchiveOrGroup::Archive(a)) =
413                        g.children.iter().find(|a| a.id().is_meta())
414                    {
415                        let Some(Archive::Local(a)) = t.get(a) else {
416                            err!("archive not found: {a}");
417                        };
418                        do_manifest(a, target)?;
419                    }
420                }
421            }
422        }
423    })
424}
425
426fn do_manifest(a: &LocalArchive, target: &Path) -> eyre::Result<()> {
427    let archive_target = target.join(a.id().to_string());
428    let manifest_target = archive_target.join("META-INF/MANIFEST.MF");
429    if manifest_target.exists() {
430        return Ok(());
431    }
432    let manifest_source = a.path().join("META-INF/MANIFEST.MF");
433    if !manifest_source.exists() {
434        err!(
435            "MANIFEST.MF of {} not found (at {})",
436            a.id(),
437            manifest_source.display()
438        );
439    }
440    // safe, because by construction, file has a parent
441    let meta_inf = unsafe { manifest_target.parent().unwrap_unchecked() };
442    err!(std::fs::create_dir_all(&meta_inf) => "Failed to create directory {}",meta_inf.display());
443    err!(std::fs::copy(&manifest_source, &manifest_target) => "failed to copy {} to {}",manifest_source.display(),manifest_target.display());
444
445    let lib_source = a.path().join("lib");
446    if lib_source.exists() {
447        let lib_target = archive_target.join("lib");
448        flams_utils::fs::copy_dir_all(&lib_source, &lib_target)?;
449    }
450    Ok(())
451}
452/*
453#[cfg(test)]
454#[rstest::rstest]
455fn standalone_test() {
456    tracing_subscriber::fmt().init();
457    flams_system::settings::Settings::initialize(flams_system::settings::SettingsSpec::default());
458    flams_system::backend::GlobalBackend::initialize();
459
460    let target_dir = Path::new("/home/jazzpirate/temp/test");
461    let doc = "https://mathhub.info?a=Papers/24-cicm-views-in-alea&d=paper&l=en"
462        .parse()
463        .unwrap();
464    let file =
465        Path::new("/home/jazzpirate/work/MathHub/Papers/24-cicm-views-in-alea/source/paper.tex");
466    export_standalone(&doc, &file, target_dir).unwrap()
467}
468 */
469
470/*
471#[cfg(test)]
472#[rstest::rstest]
473fn test() {
474    fn print<T>() {
475        tracing::info!(
476            "Size of {}:{}",
477            std::any::type_name::<T>(),
478            std::mem::size_of::<T>()
479        )
480    }
481    tracing_subscriber::fmt().init();
482    print::<ArchiveId>();
483    print::<flams_ontology::uris::BaseURI>();
484    print::<flams_ontology::uris::ArchiveURI>();
485    print::<flams_ontology::uris::PathURI>();
486    print::<flams_ontology::uris::ModuleURI>();
487    print::<flams_ontology::uris::DocumentURI>();
488    print::<flams_ontology::uris::SymbolURI>();
489    print::<flams_ontology::uris::DocumentElementURI>();
490}
491 */