Skip to main content

tex_engine/tex/tokens/
control_sequences.rs

1/*! A control sequence is a [`Token`](super::Token) of the (usually) form `\foo`.
2    We can just use strings to represent them, but there is room for optimization by e.g. interning
3    them, which requires some more infrastructure to intern and resolve them.
4
5    We implement the trait [`CSHandler`] for the Unit type `()`, in case
6    we don't want to do any interning and just use [`Ptr`]`<str>`s.
7*/
8
9use crate::prelude::{CategoryCode, CategoryCodeScheme};
10use crate::tex::characters::Character;
11use crate::tex::characters::CharacterMap;
12use crate::utils::{HMap, Ptr};
13use std::fmt::{Debug, Display, Write};
14use std::hash::Hash;
15use std::marker::PhantomData;
16use std::num::NonZeroU32;
17
18/** The name of a control sequence. */
19pub trait CSName<C: Character>: Clone + Eq + 'static + std::hash::Hash + Debug {
20    /// The type of the handler for this control sequence name.
21    type Handler: CSHandler<C, Self>;
22    /// The type used for mapping control sequence names to e.g. [`Command`](crate::commands::TeXCommand)s.
23    type Map<A>: CSNameMap<C, Self, A>
24    where
25        A: Clone;
26    fn display_fmt<W: Write>(
27        &self,
28        int: &Self::Handler,
29        cc: &CategoryCodeScheme<C>,
30        escapechar: Option<C>,
31        f: &mut W,
32    ) -> std::fmt::Result {
33        let res = int.resolve(self);
34        write!(f, "{}{}", C::display_opt(escapechar), res)?;
35        if res.len() == 1 {
36            let c = res.iter().next().unwrap();
37            match cc.get(c) {
38                CategoryCode::Letter => write!(f, " "),
39                _ => Ok(()),
40            }
41        } else {
42            write!(f, " ")
43        }
44    }
45    fn id(&self) -> usize;
46}
47
48/// How to map control sequence names to e.g. [`Command`](crate::commands::TeXCommand)s.
49pub trait CSNameMap<C: Character, CS: CSName<C>, A: Clone>: Clone + Default {
50    /// Returns the value associated with the given control sequence name, if any.
51    fn get(&self, cs: &CS) -> Option<&A>;
52    /// Inserts a new value for the given control sequence name, returning the old value if any.
53    fn insert(&mut self, cs: CS, a: A) -> Option<A>;
54    /// Removes the value associated with the given control sequence name, returning it if any.
55    fn remove(&mut self, cs: &CS) -> Option<A>;
56    fn into_iter(self) -> impl Iterator<Item = (CS, A)>;
57}
58
59impl<C: Character, CS: CSName<C>, A: Clone> CSNameMap<C, CS, A> for HMap<CS, A> {
60    fn get(&self, cs: &CS) -> Option<&A> {
61        self.get(cs)
62    }
63
64    fn insert(&mut self, cs: CS, a: A) -> Option<A> {
65        self.insert(cs, a)
66    }
67
68    fn remove(&mut self, cs: &CS) -> Option<A> {
69        self.remove(cs)
70    }
71    fn into_iter(self) -> impl Iterator<Item = (CS, A)> {
72        <HMap<CS, A> as IntoIterator>::into_iter(self)
73    }
74}
75
76impl<C: Character> CSName<C> for Ptr<str> {
77    type Handler = ();
78    type Map<A>
79        = HMap<Self, A>
80    where
81        A: Clone;
82    fn id(&self) -> usize {
83        use std::hash::Hasher;
84        let mut hash = std::hash::DefaultHasher::new();
85        self.hash(&mut hash);
86        hash.finish() as usize
87    }
88}
89
90/// A control sequence name that has been interned.
91/// Uses *consecutive* `u32` values
92pub type InternedCSName<C> = (NonZeroU32, PhantomData<C>);
93impl<C: Character> CSName<C> for InternedCSName<C> {
94    type Handler = CSInterner<C>;
95    type Map<A>
96        = CSNameVec<C, A>
97    where
98        A: Clone;
99    fn id(&self) -> usize {
100        self.0.get() as usize - 1
101    }
102}
103
104/// A [`CSNameMap`] that uses a [`Vec`] to store the values for [`InternedCSName`].
105/// Since the [`InternedCSName`]s are consecutive, we can use a [`Vec`] instead of a [`HMap`].
106#[derive(Clone)]
107pub struct CSNameVec<C: Character, A: Clone>(Vec<Option<A>>, PhantomData<C>);
108impl<C: Character, A: Clone> Default for CSNameVec<C, A> {
109    fn default() -> Self {
110        Self(Vec::new(), PhantomData)
111    }
112}
113
114impl<C: Character, A: Clone> CSNameMap<C, InternedCSName<C>, A> for CSNameVec<C, A> {
115    fn get(&self, cs: &InternedCSName<C>) -> Option<&A> {
116        self.0.get(cs.0.get() as usize - 1).and_then(|x| x.as_ref())
117    }
118    fn insert(&mut self, cs: InternedCSName<C>, a: A) -> Option<A> {
119        let idx = cs.0.get() as usize - 1;
120        if self.0.len() <= idx {
121            self.0.resize(idx + 1, None);
122        }
123        std::mem::replace(&mut self.0[idx], Some(a))
124    }
125    fn remove(&mut self, cs: &InternedCSName<C>) -> Option<A> {
126        let idx = cs.0.get() as usize - 1;
127        if self.0.len() <= idx {
128            return None;
129        }
130        self.0[idx].take()
131    }
132    fn into_iter(self) -> impl Iterator<Item = (InternedCSName<C>, A)> {
133        self.0.into_iter().enumerate().filter_map(|(i, x)| {
134            x.map(|x| ((NonZeroU32::new((i + 1) as u32).unwrap(), PhantomData), x))
135        })
136    }
137}
138
139/// A control sequence name that has been interned needs to be resolved again to
140/// get the actual name / display it etc.
141pub trait ResolvedCSName<'a, C: Character>: Display {
142    /// The type of the iterator over the characters of the control sequence name.
143    type Iter: Iterator<Item = C>;
144    /// Returns an iterator over the characters of the control sequence name.
145    fn iter(&self) -> Self::Iter;
146    /// Returns the length of the control sequence name in terms of the underlying [`Character`] type.
147    fn len(&self) -> usize;
148
149    fn is_empty(&self) -> bool {
150        self.len() == 0
151    }
152}
153
154/** Handles control sequence names - conversion from/to strings, displaying etc. */
155pub trait CSHandler<C: Character, CS: CSName<C>>: Default + Clone {
156    /// The type of the resolved control sequence name (for displaying / iterating over the underlying
157    /// [`Character`]s.
158    type Resolved<'a>: ResolvedCSName<'a, C>
159    where
160        Self: 'a;
161    /// Creates a new control sequence name from a string.
162    fn cs_from_str(&mut self, s: &str) -> CS;
163    /// Creates a new control sequence name from a vector of characters.
164    fn cs_from_chars(&mut self, v: &[C]) -> CS;
165    /// Resolves a control sequence name.
166    fn resolve<'a>(&'a self, cs: &'a CS) -> Self::Resolved<'a>;
167    /// Returns the name of the `\par` control sequence.
168    fn par(&self) -> CS;
169    /// Returns the name of the empty control sequence.
170    fn empty_str(&self) -> CS;
171    /// Creates a control sequence name from a string slice iff it is a previously interned control sequence name.
172    fn get(&self, s: &str) -> Option<CS>;
173}
174
175impl<'a, C: Character> ResolvedCSName<'a, C> for &'a str {
176    type Iter = C::Iter<'a>;
177    fn iter(&self) -> Self::Iter {
178        C::string_to_iter(self)
179    }
180    fn len(&self) -> usize {
181        C::string_to_iter(self).len()
182    }
183}
184
185impl<C: Character> CSHandler<C, Ptr<str>> for () {
186    type Resolved<'a> = &'a str;
187
188    fn cs_from_str(&mut self, s: &str) -> Ptr<str> {
189        s.into()
190    }
191    fn get(&self, s: &str) -> Option<Ptr<str>> {
192        Some(s.into())
193    }
194    fn resolve<'a>(&'a self, cs: &'a Ptr<str>) -> Self::Resolved<'a> {
195        cs
196    }
197    fn par(&self) -> Ptr<str> {
198        "par".to_string().into()
199    }
200    fn empty_str(&self) -> Ptr<str> {
201        "".to_string().into()
202    }
203    fn cs_from_chars(&mut self, v: &[C]) -> Ptr<str> {
204        let mut s = String::new();
205        for c in v {
206            c.display_fmt(&mut s);
207        }
208        s.into()
209    }
210}
211
212/// A [`CSHandler`] that interns control sequence names as `u32`.
213#[derive(Clone)]
214pub struct CSInterner<C: Character> {
215    map: HMap<Box<[C]>, NonZeroU32>,
216    ls: Vec<C>,
217    idx: Vec<usize>,
218}
219impl<C: Character> CSInterner<C> {
220    #[allow(dead_code)]
221    fn cap(&self) -> usize {
222        self.idx.len()
223    }
224    fn new() -> Self {
225        let mut map: HMap<Box<[C]>, NonZeroU32> = HMap::default();
226        map.insert(Box::new([]), NonZeroU32::new(1).unwrap());
227        let mut r = CSInterner {
228            map,
229            ls: Vec::new(),
230            idx: vec![0],
231        };
232        r.from_static("par");
233        r
234    }
235    /// Interns a `&'static str` as a control sequence name
236    pub fn from_static(&mut self, s: &'static str) -> InternedCSName<C> {
237        self.intern(C::string_to_iter(s).collect::<Vec<_>>().as_slice())
238    }
239    /// Interns a `String` as a control sequence name
240    pub fn from_string<S: AsRef<str>>(&mut self, s: S) -> InternedCSName<C> {
241        self.intern(C::string_to_iter(s.as_ref()).collect::<Vec<_>>().as_slice())
242    }
243    /// Resolves a control sequence name to a sequence of [`Character`]s
244    pub fn resolve(&self, i: InternedCSName<C>) -> &[C] {
245        self.get(i.0)
246    }
247
248    pub fn intern(&mut self, v: &[C]) -> InternedCSName<C> {
249        if let Some(x) = self.map.get(v) {
250            return (*x, PhantomData);
251        }
252        self.ls.extend(v);
253        let len = self.ls.len();
254        self.idx.push(len);
255        let len = self.idx.len() - 1;
256        let r = NonZeroU32::new(len as u32 + 1).unwrap();
257        self.map.insert(v.into(), r);
258        (r, PhantomData)
259    }
260
261    fn get(&self, i: NonZeroU32) -> &[C] {
262        let i = i.get() as usize - 1;
263        if i == 0 {
264            return &[];
265        }
266        let s = self.idx[i - 1];
267        let e = self.idx[i];
268        &self.ls[s..e]
269    }
270}
271impl<C: Character> CSHandler<C, InternedCSName<C>> for CSInterner<C> {
272    type Resolved<'a> = DisplayCSName<'a, C>;
273    fn cs_from_str(&mut self, s: &str) -> InternedCSName<C> {
274        self.intern(C::string_to_iter(s).collect::<Vec<_>>().as_slice())
275    }
276    fn get(&self, s: &str) -> Option<InternedCSName<C>> {
277        self.map
278            .get(C::string_to_iter(s).collect::<Vec<_>>().as_slice())
279            .map(|i| (*i, PhantomData))
280    }
281    fn cs_from_chars(&mut self, v: &[C]) -> InternedCSName<C> {
282        self.intern(v)
283    }
284    fn par(&self) -> InternedCSName<C> {
285        (NonZeroU32::new(2).unwrap(), PhantomData)
286    }
287    fn empty_str(&self) -> InternedCSName<C> {
288        (NonZeroU32::new(1).unwrap(), PhantomData)
289    }
290    fn resolve<'a>(&'a self, cs: &InternedCSName<C>) -> DisplayCSName<'a, C> {
291        DisplayCSName(self.get(cs.0))
292    }
293}
294/// Utility struct for displaying a control sequence name.
295pub struct DisplayCSName<'a, C: Character>(&'a [C]);
296impl<C: Character> Display for DisplayCSName<'_, C> {
297    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
298        for c in self.0 {
299            c.display_fmt(f)
300        }
301        Ok(())
302    }
303}
304
305impl<'a, C: Character> ResolvedCSName<'a, C> for DisplayCSName<'a, C> {
306    type Iter = std::iter::Copied<std::slice::Iter<'a, C>>;
307
308    fn iter(&self) -> Self::Iter {
309        self.0.iter().copied()
310    }
311
312    fn len(&self) -> usize {
313        self.0.len()
314    }
315}
316impl<C: Character> Default for CSInterner<C> {
317    fn default() -> Self {
318        Self::new()
319    }
320}