Skip to main content

tex_engine/tex/tokens/
token_lists.rs

1use crate::prelude::{
2    CSHandler, CSName, CategoryCode, CategoryCodeScheme, Character, CommandCode, Token,
3};
4use crate::tex::characters::CharacterMap;
5use crate::tex::tokens::control_sequences::ResolvedCSName;
6use crate::tex::tokens::StandardToken;
7use std::fmt::{Arguments, Display, Write};
8use std::marker::PhantomData;
9
10/// A list of [`Token`]s; conceptually, a wrapper around `Rc<[T]>`
11#[cfg(not(feature = "multithreaded"))]
12#[derive(Clone, Debug, PartialEq)]
13pub struct TokenList<T: Token>(pub shared_vector::SharedVector<T>);
14
15#[cfg(feature = "multithreaded")]
16#[derive(Clone, Debug, PartialEq)]
17pub struct TokenList<T: Token>(pub shared_vector::AtomicSharedVector<T>);
18impl<T: Token> TokenList<T> {
19    /// Whether the list is empty
20
21    pub fn is_empty(&self) -> bool {
22        self.0.is_empty()
23    }
24    /// return the `i`th token in the list. Will panic if out of bounds.
25
26    pub fn get(&self, i: usize) -> &T {
27        &(*self.0)[i]
28    }
29
30    /// wraps this list in a [`TokenListDisplay`], which implements [`Display`].
31    /// If `double_par` is true, parameter tokens will be doubled.
32    pub fn display<'a>(
33        &'a self,
34        int: &'a <T::CS as CSName<T::Char>>::Handler,
35        cc: &'a CategoryCodeScheme<T::Char>,
36        escapechar: Option<T::Char>,
37        double_par: bool,
38    ) -> TokenListDisplay<'a, T> {
39        TokenListDisplay {
40            ls: self.0.as_slice(),
41            int,
42            cc,
43            escapechar,
44            double_par,
45        }
46    }
47}
48impl<T: Token> FromIterator<T> for TokenList<T> {
49    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
50        let mut v = shared_vector::Vector::new();
51        for t in iter {
52            v.push(t);
53        }
54        Self(v.into())
55    }
56}
57
58/// A helper struct that implements [`Display`] for [`TokenList`]. Needs a [`CSHandler`] and a [`CategoryCodeScheme`]
59/// to resolve control sequences and insert spaces between them properly.
60pub struct TokenListDisplay<'a, T: Token> {
61    ls: &'a [T],
62    int: &'a <T::CS as CSName<T::Char>>::Handler,
63    cc: &'a CategoryCodeScheme<T::Char>,
64    escapechar: Option<T::Char>,
65    double_par: bool,
66}
67impl<'a, T: Token> TokenListDisplay<'a, T> {
68    /// Creates a new [`TokenListDisplay`] from a [`Vec`] of [`Token`]s.
69    /// If `double_par` is true, parameter tokens will be doubled.
70    pub fn from_vec(
71        v: &'a Vec<T>,
72        int: &'a <T::CS as CSName<T::Char>>::Handler,
73        cc: &'a CategoryCodeScheme<T::Char>,
74        escapechar: Option<T::Char>,
75        double_par: bool,
76    ) -> Self {
77        Self {
78            ls: v.as_slice(),
79            int,
80            cc,
81            escapechar,
82            double_par,
83        }
84    }
85    /// allows for writing the tokens directly to a [`CharWrite`]; potentially circumventing the need to
86    /// convert it to a string only to convert it back to tokents
87    pub fn fmt_cw<W: CharWrite<T::Char, T::CS>>(&self, f: &mut W) -> std::fmt::Result {
88        for t in self.ls.iter() {
89            match t.is_argument_marker() {
90                Some(i) => write!(f, "#{}", (i + 1))?,
91                _ => match t.to_enum() {
92                    StandardToken::Character(c, CommandCode::Parameter) if self.double_par => {
93                        f.push_char(c);
94                        f.push_char(c)
95                    }
96                    _ => f.push_tk(t, self.int, self.cc, self.escapechar), //o.display_fmt(self.int,self.cc,self.escapechar,f)?
97                },
98            }
99        }
100        Ok(())
101    }
102}
103
104impl<'a, T: Token> Display for TokenListDisplay<'a, T> {
105    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106        self.fmt_cw(&mut StringCharWrite::new(f))
107    }
108}
109
110/// An extension of [`Write`] that can handle [`Character`]s and [`CSName`]s
111/// (and hence [`Token`]s) directly. Useful, since it allows for
112/// directly turning strings into [`TokenList`]s using `write!`, `format!` etc.
113pub trait CharWrite<C: Character, CS: CSName<C>>: std::fmt::Write {
114    /// Pushes a [`Character`] to the underlying writer.
115    fn push_char(&mut self, c: C);
116    /// Pushes a [`CSName`] to the underlying writer.
117    fn push_cs<I: CSHandler<C, CS>>(
118        &mut self,
119        cs: CS,
120        int: &I,
121        cc: &CategoryCodeScheme<C>,
122        esc: Option<C>,
123    );
124    /// Pushes a [`Token`] to the underlying writer.
125    fn push_tk<T: Token<Char = C, CS = CS>>(
126        &mut self,
127        t: &T,
128        int: &<T::CS as CSName<T::Char>>::Handler,
129        cc: &CategoryCodeScheme<T::Char>,
130        escapechar: Option<T::Char>,
131    ) {
132        match t.to_enum() {
133            StandardToken::Character(c, _) => self.push_char(c),
134            StandardToken::ControlSequence(cs) => self.push_cs(cs, int, cc, escapechar),
135            StandardToken::Primitive(id) => self.push_cs(
136                int.get(&id.display::<C>(None).to_string())
137                    .expect("Something went wrong"),
138                int,
139                cc,
140                escapechar,
141            ),
142        }
143    }
144}
145impl<'a, C: Character, CS: CSName<C>, A: CharWrite<C, CS>> CharWrite<C, CS> for &'a mut A {
146    fn push_char(&mut self, c: C) {
147        (*self).push_char(c)
148    }
149    fn push_cs<I: CSHandler<C, CS>>(
150        &mut self,
151        cs: CS,
152        int: &I,
153        cc: &CategoryCodeScheme<C>,
154        esc: Option<C>,
155    ) {
156        (*self).push_cs(cs, int, cc, esc)
157    }
158}
159
160/// Wrapper struct that adds [`CharWrite`] to any [`Write`]
161pub struct StringCharWrite<'a, W: Write, C: Character, CS: CSName<C>>(
162    &'a mut W,
163    PhantomData<C>,
164    PhantomData<CS>,
165);
166impl<'a, W: Write, C: Character, CS: CSName<C>> StringCharWrite<'a, W, C, CS> {
167    pub fn new(f: &'a mut W) -> Self {
168        Self(f, PhantomData, PhantomData)
169    }
170}
171impl<'a, W: Write, C: Character, CS: CSName<C>> std::fmt::Write for StringCharWrite<'a, W, C, CS> {
172    fn write_char(&mut self, c: char) -> std::fmt::Result {
173        self.0.write_char(c)
174    }
175
176    fn write_fmt(&mut self, args: Arguments<'_>) -> std::fmt::Result {
177        self.0.write_fmt(args)
178    }
179
180    fn write_str(&mut self, s: &str) -> std::fmt::Result {
181        self.0.write_str(s)
182    }
183}
184impl<'a, W: Write, C: Character, CS: CSName<C>> CharWrite<C, CS> for StringCharWrite<'a, W, C, CS> {
185    fn push_char(&mut self, c: C) {
186        c.display_fmt(self.0)
187    }
188    fn push_cs<I: CSHandler<C, CS>>(
189        &mut self,
190        cs: CS,
191        int: &I,
192        cc: &CategoryCodeScheme<C>,
193        esc: Option<C>,
194    ) {
195        let res = int.resolve(&cs);
196        write!(self, "{}{}", C::display_opt(esc), res).unwrap();
197        if res.len() == 1 {
198            let c = res.iter().next().unwrap();
199            if cc.get(c) == &CategoryCode::Letter {
200                self.write_char(' ').unwrap()
201            }
202        } else {
203            self.write_char(' ').unwrap()
204        }
205    }
206}
207
208/// Struct that allows to `write!` and `format!` by converting the string to
209/// [`Token`]s and passes them to a closure. All tokens have [`CommandCode::Other`]
210/// except for space characters.
211/// For example, `write!(Tokenizer::new(|t| vec.push(t), "ab c")` will
212/// push four tokens to `vec`, where the first, second and fourth have
213/// [`CommandCode::Other`] and the third has [`CommandCode::Space`].
214pub struct Otherize<'a, T: Token, F: FnMut(T)>(&'a mut F, PhantomData<T>);
215impl<'a, T: Token, F: FnMut(T)> Otherize<'a, T, F> {
216    /// Creates a new [`Otherize`] from a closure.
217
218    pub fn new(f: &'a mut F) -> Self {
219        Self(f, PhantomData)
220    }
221}
222
223impl<'a, T: Token, F: FnMut(T)> CharWrite<T::Char, T::CS> for Otherize<'a, T, F> {
224    fn push_char(&mut self, c: T::Char) {
225        if matches!(c.try_into(), Ok(b' ')) {
226            (self.0)(T::space())
227        } else {
228            (self.0)(T::from_char_cat(c, CommandCode::Other))
229        }
230    }
231    fn push_cs<I: CSHandler<T::Char, T::CS>>(
232        &mut self,
233        cs: T::CS,
234        int: &I,
235        cc: &CategoryCodeScheme<T::Char>,
236        esc: Option<T::Char>,
237    ) {
238        if let Some(e) = esc {
239            (self.0)(T::from_char_cat(e, CommandCode::Other));
240        }
241        let res = int.resolve(&cs);
242        for c in res.iter() {
243            if matches!(c.try_into(), Ok(b' ')) {
244                (self.0)(T::space());
245            } else {
246                (self.0)(T::from_char_cat(c, CommandCode::Other));
247            }
248        }
249        if res.len() == 1 {
250            let c = res.iter().next().unwrap();
251            if cc.get(c) == &CategoryCode::Letter {
252                (self.0)(T::space())
253            }
254        } else {
255            (self.0)(T::space())
256        }
257    }
258}
259impl<'a, T: Token, F: FnMut(T)> std::fmt::Write for Otherize<'a, T, F> {
260    fn write_str(&mut self, s: &str) -> std::fmt::Result {
261        for u in T::Char::string_to_iter(s) {
262            if matches!(u.try_into(), Ok(b' ')) {
263                (self.0)(T::space())
264            } else {
265                (self.0)(T::from_char_cat(u, CommandCode::Other))
266            }
267        }
268        Ok(())
269    }
270}
271
272impl<T: Token> From<shared_vector::Vector<T>> for TokenList<T> {
273    fn from(value: shared_vector::Vector<T>) -> Self {
274        Self(value.into())
275    }
276}
277
278/// A [`MacroExpansion`] bundles the [`TokenList`] of a macro with its arguments.
279pub struct MacroExpansion<T: Token> {
280    pub ls: TokenList<T>,
281    index: usize,
282    currarg: Option<(usize, usize)>,
283    pub args: [Vec<T>; 9],
284}
285impl<T: Token> MacroExpansion<T> {
286    /// Consumes the [`MacroExpansion`] by pushing its [`Token`]s reversed into the provided
287    /// `Vec` - i.e. afterwards, the first [`Token`] of the expansion is the last one of the provided `Vec`.
288    pub fn consume_rev(&mut self, v: &mut Vec<T>) {
289        for t in self.ls.0.iter().rev() {
290            if let Some(i) = t.is_argument_marker() {
291                v.extend(self.args[i as usize].iter().rev().cloned())
292            } else {
293                v.push(t.clone());
294            }
295        }
296    }
297}
298impl<T: Token> MacroExpansion<T> {
299    /// Creates a new [`MacroExpansion`] from a [`TokenList`] and a list of arguments.
300    pub fn new(ls: TokenList<T>, args: [Vec<T>; 9]) -> Self {
301        Self {
302            ls,
303            index: 0,
304            currarg: None,
305            args,
306        }
307    }
308
309    /// useful for debugging: prints the expansion from the current index to a [`Write`]
310    pub fn preview<W: Write>(
311        &self,
312        int: &<T::CS as CSName<T::Char>>::Handler,
313        cc: &CategoryCodeScheme<T::Char>,
314        escapechar: Option<T::Char>,
315        mut w: W,
316    ) {
317        let mut currarg = self.currarg;
318        let mut index = self.index;
319        loop {
320            match currarg {
321                Some((i, j)) if j < self.args[i].len() => {
322                    self.args[i][j]
323                        .display_fmt(int, cc, escapechar, &mut w)
324                        .unwrap();
325                    currarg = Some((i, j + 1));
326                }
327                Some(_) => currarg = None,
328                None if index < self.ls.0.len() => {
329                    let t = self.ls.get(index);
330                    index += 1;
331                    match t.is_argument_marker() {
332                        Some(i) => currarg = Some((i.into(), 0)),
333                        _ => t.display_fmt(int, cc, escapechar, &mut w).unwrap(),
334                    }
335                }
336                _ => return,
337            }
338        }
339    }
340}
341impl<T: Token> Iterator for MacroExpansion<T> {
342    type Item = T;
343    fn next(&mut self) -> Option<Self::Item> {
344        loop {
345            if let Some((i, j)) = self.currarg {
346                if let Some(t) = self.args[i].get(j) {
347                    self.currarg = Some((i, j + 1));
348                    return Some(t.clone());
349                } else {
350                    self.currarg = None;
351                }
352            }
353            match self.ls.0.get(self.index) {
354                None => return None,
355                Some(t) => {
356                    self.index += 1;
357                    if let Some(i) = t.is_argument_marker() {
358                        self.currarg = Some((i.into(), 0));
359                    } else {
360                        return Some(t.clone());
361                    }
362                }
363            }
364        }
365    }
366}