flams_stex/quickparse/
tokenizer.rs

1use crate::quickparse::tokens::TeXToken;
2use flams_utils::{
3    parsing::{ParseSource, StringOrStr},
4    sourcerefs::SourceRange,
5};
6use std::marker::PhantomData;
7
8use super::stex::DiagnosticLevel;
9
10#[derive(Copy, Clone, PartialEq, Eq)]
11pub enum Mode {
12    Text,
13    Math { display: bool },
14}
15
16pub struct TeXTokenizer<
17    'a,
18    Pa: ParseSource<'a>,
19    Err: FnMut(String, SourceRange<Pa::Pos>, DiagnosticLevel),
20> {
21    pub reader: Pa,
22    pub letters: String,
23    pub mode: Mode,
24    err: Err,
25    phantom: PhantomData<&'a ()>,
26}
27
28impl<'a, Pa: ParseSource<'a>, Err: FnMut(String, SourceRange<Pa::Pos>, DiagnosticLevel)> Iterator
29    for TeXTokenizer<'a, Pa, Err>
30{
31    type Item = TeXToken<Pa::Pos, Pa::Str>;
32
33    #[inline]
34    fn next(&mut self) -> Option<Self::Item> {
35        self.read_next()
36    }
37}
38
39impl<'a, Pa: ParseSource<'a>, Err: FnMut(String, SourceRange<Pa::Pos>, DiagnosticLevel)>
40    TeXTokenizer<'a, Pa, Err>
41{
42    pub(crate) fn new(reader: Pa, err: Err) -> Self {
43        TeXTokenizer {
44            reader,
45            mode: Mode::Text,
46            phantom: PhantomData,
47            err,
48            letters: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string(),
49        }
50    }
51    fn read_next(&mut self) -> Option<TeXToken<Pa::Pos, Pa::Str>> {
52        self.reader.trim_start();
53        let start = self.reader.curr_pos();
54        match self.reader.peek_head() {
55            None => None,
56            Some('%') => {
57                self.reader.pop_head();
58                Some(self.read_comment(start))
59            }
60            Some('{') => {
61                self.reader.pop_head();
62                Some(TeXToken::BeginGroupChar(start))
63            }
64            Some('}') => {
65                self.reader.pop_head();
66                Some(TeXToken::EndGroupChar(start))
67            }
68            Some('$') => {
69                self.reader.pop_head();
70                match self.mode {
71                    Mode::Math { display: true } => {
72                        if self.reader.starts_with('$') {
73                            self.reader.pop_head();
74                        } else {
75                            self.problem(
76                                start,
77                                "Missing $ closing display math",
78                                DiagnosticLevel::Error,
79                            );
80                        }
81                        self.close_math();
82                        Some(TeXToken::EndMath { start })
83                    }
84                    Mode::Math { .. } => {
85                        self.close_math();
86                        Some(TeXToken::EndMath { start })
87                    }
88                    Mode::Text => {
89                        if self.reader.starts_with('$') {
90                            self.reader.pop_head();
91                            self.open_math(true);
92                            Some(TeXToken::BeginMath {
93                                display: true,
94                                start,
95                            })
96                        } else {
97                            self.open_math(false);
98                            Some(TeXToken::BeginMath {
99                                display: false,
100                                start,
101                            })
102                        }
103                    }
104                }
105            }
106            Some('\\') => {
107                self.reader.pop_head();
108                let name = match self.reader.peek_head() {
109                    Some(c) if self.letters.contains(c) => {
110                        self.reader.read_while(|c| self.letters.contains(c))
111                    }
112                    None => "".into(),
113                    _ => self.reader.read_n(1),
114                };
115                Some(TeXToken::ControlSequence { start, name })
116            }
117            _ => {
118                let text = self.reader.read_while(|c| !"%{}$\\".contains(c));
119                Some(TeXToken::Text {
120                    range: SourceRange {
121                        start,
122                        end: self.reader.curr_pos(),
123                    },
124                    text,
125                })
126            }
127        }
128    }
129
130    #[inline]
131    pub const fn open_math(&mut self, display: bool) {
132        self.mode = Mode::Math { display };
133    }
134    #[inline]
135    pub const fn close_math(&mut self) {
136        self.mode = Mode::Text;
137    }
138
139    #[inline]
140    pub fn problem(&mut self, start: Pa::Pos, msg: impl std::fmt::Display, level: DiagnosticLevel) {
141        (self.err)(
142            msg.to_string(),
143            SourceRange {
144                start,
145                end: self.reader.curr_pos(),
146            },
147            level,
148        );
149    }
150
151    fn read_comment(&mut self, start: Pa::Pos) -> TeXToken<Pa::Pos, Pa::Str> {
152        let (c, end) = self.reader.read_until_line_end();
153        c.strip_prefix("%STEXIDE").ok().map_or_else(
154            || TeXToken::Comment(SourceRange { start, end }),
155            TeXToken::Directive,
156        )
157    }
158}
159
160/*
161#[test]
162fn test() {
163    use std::path::PathBuf;
164    tracing::subscriber::set_global_default(
165        tracing_subscriber::FmtSubscriber::builder()
166            .with_max_level(tracing::Level::TRACE)
167            .finish(),
168    );
169    let path = PathBuf::from("/home/jazzpirate/work/MathHub/courses/FAU/IWGS/problems/source/regex/prob/regex_scientific.de.tex");
170    let str = std::fs::read_to_string(&path).unwrap();
171    let reader = flams_utils::parsing::ParseStr::<flams_utils::sourcerefs::LSPLineCol>::new(&str);
172    let tokenizer = TeXTokenizer::new(reader, Some(&path),|e,p| tracing::error!("Error {e} ({p:?})"));
173    for tk in tokenizer {
174        tracing::info!("{tk:?}");
175    }
176}
177*/