1use crate::prelude::*;
2use crate::tex::characters::{TextLine, TextLineSource};
3use crate::utils::errors::InvalidCharacter;
4
5#[derive(Copy, Clone, PartialEq, Eq, Debug)]
7pub enum MouthState {
8 NewLine,
10 SkipBlank,
12 MidLine,
14}
15
16#[derive(Clone, Debug)]
61pub struct InputTokenizer<C: Character, S: TextLineSource<C>> {
62 pub state: MouthState,
63 line: usize,
64 col: usize,
65 current_line: TextLine<C>,
66 pub source: S,
67 pub(crate) eof: bool,
68 tempstr: Vec<C>,
69}
70
71type Csh<T> = <<T as Token>::CS as CSName<<T as Token>::Char>>::Handler;
72
73impl<C: Character, S: TextLineSource<C>> InputTokenizer<C, S> {
74 pub fn new(mut source: S) -> Self {
76 Self {
77 state: MouthState::NewLine,
78 line: 1,
79 col: 0,
80 current_line: source.get_line().unwrap_or(TextLine::default()),
81 source,
82 eof: false,
83 tempstr: Vec::new(),
84 }
85 }
86 pub fn line(&self) -> usize {
89 self.line
90 }
91 pub fn column(&self) -> usize {
94 self.col + 1
95 }
96 pub fn eof(&self) -> bool {
99 self.eof
100 }
101
102 fn get_char(&mut self) -> Option<C> {
103 if self.col >= self.current_line.len() {
104 None
105 } else {
106 let next = self.current_line[self.col];
107 self.col += 1;
108 Some(next)
109 }
110 }
111
112 pub fn readline<T: Token<Char = C>, F: FnMut(T)>(&mut self, mut f: F) {
115 while self.col < self.current_line.len() {
116 let next = self.current_line[self.col];
117 self.col += 1;
118 match next.try_into() {
119 Ok(b' ') => f(T::space()),
120 _ => f(T::from_char_cat(next, CommandCode::Other)),
121 }
122 }
123 self.next_line();
124 }
125
126 pub fn read<T: Token<Char = C>, F: FnMut(T)>(
130 &mut self,
131 handler: &mut Csh<T>,
132 cc: &CategoryCodeScheme<C>,
133 endline: Option<C>,
134 par_token: &T::CS,
135 mut f: F,
136 ) -> Result<(), InvalidCharacter<C>> {
137 let mut ingroups = 0;
138 let mut ret: Result<(), InvalidCharacter<C>> = Ok(());
139 let line = self.line;
140 while self.line == line || ingroups > 0 {
141 match self.get_char() {
142 None => {
143 if self.eof {
144 return ret;
145 }
146 if let Some(n) = self.return_endline::<T>(cc, endline, par_token) {
147 f(n)
148 }
149 return ret;
150 }
151 Some(c) => match self.check_char::<T>(handler, cc, endline, c, par_token) {
152 Ok(None) if self.line == line || ingroups > 0 => (),
153 Ok(None) => return ret,
154 Ok(Some(tk)) => {
155 if tk.command_code() == CommandCode::BeginGroup {
156 ingroups += 1
157 } else if tk.command_code() == CommandCode::EndGroup {
158 ingroups -= 1
159 }
160 f(tk)
161 }
162 Err(i) => {
163 f(T::from_char_cat(i.0, CommandCode::Other));
164 ret = Err(i)
165 }
166 },
167 }
168 }
169 ret
170 }
171
172 pub fn get_next<T: Token<Char = C>>(
175 &mut self,
176 handler: &mut Csh<T>,
177 cc: &CategoryCodeScheme<C>,
178 endline: Option<C>,
179 par_token: &T::CS,
180 ) -> Result<Option<T>, InvalidCharacter<C>> {
181 loop {
182 match self.get_char() {
183 None if self.eof => return Ok(None),
184 None => {
185 if let Some(e) = self.return_endline::<T>(cc, endline, par_token) {
186 return Ok(Some(e));
188 }
189 }
190 Some(c) => {
191 if let Some(t) = self.check_char::<T>(handler, cc, endline, c, par_token)? {
192 return Ok(Some(t));
193 }
194 }
195 };
196 }
197 }
198
199 fn check_char<T: Token<Char = C>>(
200 &mut self,
201 handler: &mut Csh<T>,
202 cc: &CategoryCodeScheme<C>,
203 endline: Option<C>,
204 c: C,
205 par_token: &T::CS,
206 ) -> Result<Option<T>, InvalidCharacter<C>> {
207 use CategoryCode::*;
208 match cc.get(c) {
209 EOL if self.state == MouthState::NewLine => {
210 self.next_line();
211 Ok(Some(self.do_par(par_token.clone())))
212 }
213 EOL => Ok(self.return_endline::<T>(cc, endline, par_token)),
214 Space if self.state == MouthState::SkipBlank => Ok(None),
215 Space if self.state == MouthState::NewLine => Ok(None),
216 Space => {
217 self.state = MouthState::SkipBlank;
218 Ok(Some(T::space()))
219 }
220 Ignored => Ok(None),
221 Comment => {
222 self.next_line();
223 self.state = MouthState::NewLine;
224 Ok(None)
225 }
226 Invalid => Err(InvalidCharacter(c)),
227 Escape => Ok(Some(self.get_escape::<T>(handler, cc, endline))),
228 Superscript => match self.maybe_superscript(c) {
229 Some(c) => self.check_char::<T>(handler, cc, endline, c, par_token),
230 None => {
231 self.state = MouthState::MidLine;
232 Ok(Some(T::from_char_cat(c, CommandCode::Superscript)))
233 }
234 },
235 cc => {
236 self.state = MouthState::MidLine;
237 Ok(Some(T::from_char_cat(c, (*cc).into())))
238 }
239 }
240 }
241
242 fn next_line(&mut self) {
243 if let Some(next) = self.source.get_line() {
244 self.current_line = next;
245 self.line += 1;
246 self.col = 0;
247 } else {
248 self.eof = true;
249 self.col = self.current_line.len();
250 self.state = MouthState::MidLine;
251 }
252 }
253
254 fn do_par<T: Token<Char = C>>(&mut self, par: T::CS) -> T {
255 if self.current_line.is_empty() {
256 while let Some(line) = self.source.get_line() {
257 self.line += 1;
258 if !line.is_empty() {
259 self.current_line = line;
260 break;
261 }
262 }
263 }
264 T::from_cs(par)
265 }
266
267 fn return_endline<T: Token<Char = C>>(
268 &mut self,
269 cc: &CategoryCodeScheme<C>,
270 endline: Option<C>,
271 par: &T::CS,
272 ) -> Option<T> {
273 use CategoryCode::*;
274 self.next_line();
275 let ret = match endline {
276 None => None,
277 Some(c) => match cc.get(c) {
278 Space | EOL if self.state == MouthState::SkipBlank => None,
279 Space if self.state == MouthState::NewLine => None,
280 EOL if self.state == MouthState::NewLine => Some(self.do_par(par.clone())),
281 EOL => Some(T::space()),
282 Ignored | Invalid | Comment => None,
283 o => Some(T::from_char_cat(c, (*o).into())),
284 },
285 };
286 self.state = MouthState::NewLine;
287 ret
288 }
289
290 fn get_escape<T: Token<Char = C>>(
291 &mut self,
292 handler: &mut Csh<T>,
293 cc: &CategoryCodeScheme<C>,
294 endline: Option<C>,
295 ) -> T {
296 let name = match self.get_char() {
297 None => {
298 self.next_line();
299 match endline {
300 None => handler.empty_str(),
301 Some(c) => {
302 self.tempstr.clear();
303 self.tempstr.push(c);
304 handler.cs_from_chars(&self.tempstr)
305 }
306 }
307 }
308 Some(next) => self.check_escape::<T>(handler, cc, next),
309 };
310 T::from_cs(name)
311 }
312
313 fn check_escape<T: Token<Char = C>>(
314 &mut self,
315 handler: &mut Csh<T>,
316 cc: &CategoryCodeScheme<C>,
317 next: C,
318 ) -> T::CS {
319 use CategoryCode::*;
320 match cc.get(next) {
321 Superscript => match self.maybe_superscript(next) {
322 Some(c) => self.check_escape::<T>(handler, cc, c),
323 None => {
324 self.state = MouthState::MidLine;
325 self.tempstr.clear();
326 self.tempstr.push(next);
327 handler.cs_from_chars(&self.tempstr)
328 }
329 },
330 Letter => self.get_cs_name::<T>(handler, cc, next),
331 _ => {
332 self.state = MouthState::MidLine;
333 self.tempstr.clear();
334 self.tempstr.push(next);
335 handler.cs_from_chars(&self.tempstr)
336 }
337 }
338 }
339
340 fn get_cs_name<T: Token<Char = C>>(
341 &mut self,
342 handler: &mut Csh<T>,
343 cc: &CategoryCodeScheme<C>,
344 first: C,
345 ) -> T::CS {
346 self.tempstr.clear();
347 self.tempstr.push(first);
348 self.state = MouthState::SkipBlank;
349 loop {
350 match self.get_char() {
351 None => break,
352 Some(next) => match cc.get(next) {
353 CategoryCode::Letter => self.tempstr.push(next),
354 CategoryCode::Superscript => {
355 let curr = self.col;
356 match self.maybe_superscript(next) {
357 Some(c) if *cc.get(c) == CategoryCode::Letter => self.tempstr.push(c),
358 _ => {
359 self.col = curr;
360 self.col -= 1;
361 break;
362 }
363 }
364 }
365 _ => {
366 self.col -= 1;
367 break;
368 }
369 },
370 }
371 }
372 handler.cs_from_chars(&self.tempstr)
373 }
374
375 fn cond(i: C) -> bool {
376 (Into::<C>::into(48u8) <= i && i <= Into::<C>::into(57u8))
377 || (Into::<C>::into(97u8) <= i && i <= Into::<C>::into(102u8))
378 }
379
380 fn maybe_superscript(&mut self, firstsup: C) -> Option<C> {
381 match self.get_char() {
382 None => None,
383 Some(c) if c != firstsup => {
384 self.col -= 1;
385 None
386 }
387 Some(_) => match self.get_char() {
388 None => {
389 self.col -= 1;
390 None
391 }
392 Some(first) => match self.get_char() {
393 None => {
394 if first < (128).into() {
395 let u: u8 = match first.try_into() {
396 Ok(u) => u,
397 Err(_) => {
398 self.col -= 1;
399 return None;
400 }
401 };
402 let ch: C = (if u < 64 { u + 64 } else { u - 64 }).into();
403 Some(ch)
404 } else {
405 self.col -= 2;
406 None
407 }
408 }
409 Some(second) => {
410 if Self::cond(first) && Self::cond(second) {
411 let ufirst: u8 = match first.try_into() {
412 Ok(u) => u,
413 Err(_) => {
414 self.col -= 2;
415 return None;
416 }
417 };
418 let usecond: u8 = match second.try_into() {
419 Ok(u) => u,
420 Err(_) => {
421 self.col -= 2;
422 return None;
423 }
424 };
425 let char = u8::from_str_radix(
426 std::str::from_utf8(&[ufirst, usecond]).unwrap(),
427 16,
428 )
429 .unwrap();
430 Some(char.into())
431 } else {
432 self.col -= 1;
433 if first < (128).into() {
434 let u: u8 = match first.try_into() {
435 Ok(u) => u,
436 Err(_) => {
437 self.col -= 2;
438 return None;
439 }
440 };
441 let ch: C = (if u < 64 { u + 64 } else { u - 64 }).into();
442 Some(ch)
443 } else {
444 self.col -= 2;
445 None
446 }
447 }
448 }
449 },
450 },
451 }
452 }
453
454 pub fn preview<W: std::fmt::Write>(&self, len: &mut usize, mut f: W) -> std::fmt::Result {
456 if self.current_line.is_empty() {
457 return Ok(());
458 }
459 if self.current_line.len() > self.col {
460 for c in &self.current_line[self.col..] {
461 *len -= 1;
462 c.display_fmt(&mut f);
463 if *len == 0 {
464 return Ok(());
465 }
466 }
467 }
468 Ok(())
469 }
470}