flams_stex/quickparse/
tokenizer.rs

1use crate::quickparse::tokens::TeXToken;
2use flams_utils::{
3    parsing::{ParseSource, StringOrStr},
4    sourcerefs::SourceRange,
5};
6use std::marker::PhantomData;
7
8use super::stex::DiagnosticLevel;
9
10#[derive(Copy, Clone, PartialEq, Eq)]
11pub enum Mode {
12    Text,
13    Math { display: bool },
14}
15
16pub struct TeXTokenizer<'a, 
17    Pa:ParseSource<'a>,
18    Err:FnMut(String,SourceRange<Pa::Pos>,DiagnosticLevel)
19> {
20    pub reader: Pa,
21    pub letters: String,
22    pub mode: Mode,
23    err:Err,
24    phantom:PhantomData<&'a ()>
25}
26
27impl<'a, 
28    Pa:ParseSource<'a>,
29    Err:FnMut(String,SourceRange<Pa::Pos>,DiagnosticLevel)
30> Iterator for TeXTokenizer<'a, Pa,Err> {
31    type Item = TeXToken<Pa::Pos, Pa::Str>;
32
33    #[inline]
34    fn next(&mut self) -> Option<Self::Item> {
35        self.read_next()
36    }
37}
38
39
40impl<'a, 
41    Pa:ParseSource<'a>,
42    Err:FnMut(String,SourceRange<Pa::Pos>,DiagnosticLevel)
43> TeXTokenizer<'a, Pa,Err> {
44    pub(crate) fn new(reader: Pa,err:Err) -> Self {
45        TeXTokenizer {
46            reader,
47            mode: Mode::Text,
48            phantom: PhantomData,
49            err,
50            letters: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string(),
51        }
52    }
53    fn read_next(&mut self) -> Option<TeXToken<Pa::Pos, Pa::Str>> {
54        self.reader.trim_start();
55        let start = self.reader.curr_pos();
56        match self.reader.peek_head() {
57            None => None,
58            Some('%') => {
59                self.reader.pop_head();
60                Some(self.read_comment(start))
61            }
62            Some('{') => {
63                self.reader.pop_head();
64                Some(TeXToken::BeginGroupChar(start))
65            }
66            Some('}') => {
67                self.reader.pop_head();
68                Some(TeXToken::EndGroupChar(start))
69            }
70            Some('$') => {
71                self.reader.pop_head();
72                match self.mode {
73                    Mode::Math { display: true } => {
74                        if self.reader.starts_with('$') {
75                            self.reader.pop_head();
76                        } else {
77                            self.problem(start,"Missing $ closing display math",DiagnosticLevel::Error);
78                        }
79                        self.close_math();
80                        Some(TeXToken::EndMath { start })
81                    }
82                    Mode::Math { .. } => {
83                        self.close_math();
84                        Some(TeXToken::EndMath { start })
85                    }
86                    Mode::Text => {
87                        if self.reader.starts_with('$') {
88                            self.reader.pop_head();
89                            self.open_math(true);
90                            Some(TeXToken::BeginMath {
91                                display: true,
92                                start,
93                            })
94                        } else {
95                            self.open_math(false);
96                            Some(TeXToken::BeginMath {
97                                display: false,
98                                start,
99                            })
100                        }
101                    }
102                }
103            }
104            Some('\\') => {
105                self.reader.pop_head();
106                let name = match self.reader.peek_head() {
107                    Some(c) if self.letters.contains(c) => {
108                        self.reader.read_while(|c| self.letters.contains(c))
109                    }
110                    None => "".into(),
111                    _ => self.reader.read_n(1),
112                };
113                Some(TeXToken::ControlSequence { start, name })
114            }
115            _ => {
116                let text = self.reader.read_while(|c| !"%{}$\\".contains(c));
117                Some(TeXToken::Text {
118                    range: SourceRange {
119                        start,
120                        end: self.reader.curr_pos(),
121                    },
122                    text,
123                })
124            }
125        }
126    }
127
128    #[inline]
129    pub fn open_math(&mut self, display: bool) {
130        self.mode = Mode::Math { display };
131    }
132    #[inline]
133    pub fn close_math(&mut self) {
134        self.mode = Mode::Text;
135    }
136
137    #[inline]
138    pub fn problem(&mut self,start:Pa::Pos, msg: impl std::fmt::Display,level:DiagnosticLevel) {
139        (self.err)(msg.to_string(), SourceRange{start,end: self.reader.curr_pos()},level);
140    }
141
142    fn read_comment(&mut self, start: Pa::Pos) -> TeXToken<Pa::Pos, Pa::Str> {
143        let (c, end) = self.reader.read_until_line_end();
144        c.strip_prefix("%STEXIDE").ok().map_or_else(
145            || TeXToken::Comment(SourceRange { start, end }),
146            TeXToken::Directive,
147        )
148    }
149}
150
151/*
152#[test]
153fn test() {
154    use std::path::PathBuf;
155    tracing::subscriber::set_global_default(
156        tracing_subscriber::FmtSubscriber::builder()
157            .with_max_level(tracing::Level::TRACE)
158            .finish(),
159    );
160    let path = PathBuf::from("/home/jazzpirate/work/MathHub/courses/FAU/IWGS/problems/source/regex/prob/regex_scientific.de.tex");
161    let str = std::fs::read_to_string(&path).unwrap();
162    let reader = flams_utils::parsing::ParseStr::<flams_utils::sourcerefs::LSPLineCol>::new(&str);
163    let tokenizer = TeXTokenizer::new(reader, Some(&path),|e,p| tracing::error!("Error {e} ({p:?})"));
164    for tk in tokenizer {
165        tracing::info!("{tk:?}");
166    }
167}
168*/