flams_stex/quickparse/
tokenizer.rs1use crate::quickparse::tokens::TeXToken;
2use flams_utils::{
3 parsing::{ParseSource, StringOrStr},
4 sourcerefs::SourceRange,
5};
6use std::marker::PhantomData;
7
8use super::stex::DiagnosticLevel;
9
10#[derive(Copy, Clone, PartialEq, Eq)]
11pub enum Mode {
12 Text,
13 Math { display: bool },
14}
15
16pub struct TeXTokenizer<
17 'a,
18 Pa: ParseSource<'a>,
19 Err: FnMut(String, SourceRange<Pa::Pos>, DiagnosticLevel),
20> {
21 pub reader: Pa,
22 pub letters: String,
23 pub mode: Mode,
24 err: Err,
25 phantom: PhantomData<&'a ()>,
26}
27
28impl<'a, Pa: ParseSource<'a>, Err: FnMut(String, SourceRange<Pa::Pos>, DiagnosticLevel)> Iterator
29 for TeXTokenizer<'a, Pa, Err>
30{
31 type Item = TeXToken<Pa::Pos, Pa::Str>;
32
33 #[inline]
34 fn next(&mut self) -> Option<Self::Item> {
35 self.read_next()
36 }
37}
38
39impl<'a, Pa: ParseSource<'a>, Err: FnMut(String, SourceRange<Pa::Pos>, DiagnosticLevel)>
40 TeXTokenizer<'a, Pa, Err>
41{
42 pub(crate) fn new(reader: Pa, err: Err) -> Self {
43 TeXTokenizer {
44 reader,
45 mode: Mode::Text,
46 phantom: PhantomData,
47 err,
48 letters: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string(),
49 }
50 }
51 fn read_next(&mut self) -> Option<TeXToken<Pa::Pos, Pa::Str>> {
52 self.reader.trim_start();
53 let start = self.reader.curr_pos();
54 match self.reader.peek_head() {
55 None => None,
56 Some('%') => {
57 self.reader.pop_head();
58 Some(self.read_comment(start))
59 }
60 Some('{') => {
61 self.reader.pop_head();
62 Some(TeXToken::BeginGroupChar(start))
63 }
64 Some('}') => {
65 self.reader.pop_head();
66 Some(TeXToken::EndGroupChar(start))
67 }
68 Some('$') => {
69 self.reader.pop_head();
70 match self.mode {
71 Mode::Math { display: true } => {
72 if self.reader.starts_with('$') {
73 self.reader.pop_head();
74 } else {
75 self.problem(
76 start,
77 "Missing $ closing display math",
78 DiagnosticLevel::Error,
79 );
80 }
81 self.close_math();
82 Some(TeXToken::EndMath { start })
83 }
84 Mode::Math { .. } => {
85 self.close_math();
86 Some(TeXToken::EndMath { start })
87 }
88 Mode::Text => {
89 if self.reader.starts_with('$') {
90 self.reader.pop_head();
91 self.open_math(true);
92 Some(TeXToken::BeginMath {
93 display: true,
94 start,
95 })
96 } else {
97 self.open_math(false);
98 Some(TeXToken::BeginMath {
99 display: false,
100 start,
101 })
102 }
103 }
104 }
105 }
106 Some('\\') => {
107 self.reader.pop_head();
108 let name = match self.reader.peek_head() {
109 Some(c) if self.letters.contains(c) => {
110 self.reader.read_while(|c| self.letters.contains(c))
111 }
112 None => "".into(),
113 _ => self.reader.read_n(1),
114 };
115 Some(TeXToken::ControlSequence { start, name })
116 }
117 _ => {
118 let text = self.reader.read_while(|c| !"%{}$\\".contains(c));
119 Some(TeXToken::Text {
120 range: SourceRange {
121 start,
122 end: self.reader.curr_pos(),
123 },
124 text,
125 })
126 }
127 }
128 }
129
130 #[inline]
131 pub const fn open_math(&mut self, display: bool) {
132 self.mode = Mode::Math { display };
133 }
134 #[inline]
135 pub const fn close_math(&mut self) {
136 self.mode = Mode::Text;
137 }
138
139 #[inline]
140 pub fn problem(&mut self, start: Pa::Pos, msg: impl std::fmt::Display, level: DiagnosticLevel) {
141 (self.err)(
142 msg.to_string(),
143 SourceRange {
144 start,
145 end: self.reader.curr_pos(),
146 },
147 level,
148 );
149 }
150
151 fn read_comment(&mut self, start: Pa::Pos) -> TeXToken<Pa::Pos, Pa::Str> {
152 let (c, end) = self.reader.read_until_line_end();
153 c.strip_prefix("%STEXIDE").ok().map_or_else(
154 || TeXToken::Comment(SourceRange { start, end }),
155 TeXToken::Directive,
156 )
157 }
158}
159
160