Skip to main content

tex_engine/tex/
catcodes.rs

1/*!
2Category codes
3 */
4
5use crate::tex::characters::Character;
6use crate::tex::tokens::control_sequences::CSName;
7use crate::tex::tokens::token_lists::CharWrite;
8use const_for::const_for;
9use std::fmt::Formatter;
10
11/** The category code of a character.
12
13To convert between [`CategoryCode`]s and their numerical values (as [`u8`]), use [`CategoryCode::try_from`]
14and [`u8::from`], respectively.
15
16# Example
17```rust
18use tex_engine::tex::catcodes::CategoryCode;
19
20let cat = CategoryCode::BeginGroup;
21let num : u8 = cat.into();
22assert_eq!(num,1);
23let cat2 = CategoryCode::try_from(1).unwrap();
24assert_eq!(cat2,cat);
25```
26 */
27#[derive(Copy, PartialEq, Eq, Clone, Default)]
28pub enum CategoryCode {
29    /// Escape character (0); usually `\`
30    Escape = 0,
31    /// Begin group character (1); usually `{`
32    BeginGroup = 1,
33    /// End group character (2); usually `}`
34    EndGroup = 2,
35    /// Math shift character (3); usually `$`
36    MathShift = 3,
37    /// Alignment tab character (4); usually `&`
38    AlignmentTab = 4,
39    /// End of line character (5); usually `\n`
40    EOL = 5,
41    /// Parameter character (6); usually `#`
42    Parameter = 6,
43    /// Superscript character (7); usually `^`
44    Superscript = 7,
45    /// Subscript character (8); usually `_`
46    Subscript = 8,
47    /// Ignored character (9)
48    Ignored = 9,
49    /// Space character (10); usually ` `
50    Space = 10,
51    /// Letter character (11), usually a-z and A-Z
52    Letter = 11,
53    /// Other character (12), usually e.g. `@`, `!`, `?`, etc.
54    #[default]
55    Other = 12,
56    /// Active character (13); usually `~`
57    Active = 13,
58    /// Comment character (14); usually `%`
59    Comment = 14,
60    /// Invalid character (15)
61    Invalid = 15,
62}
63
64impl std::fmt::Debug for CategoryCode {
65    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
66        std::fmt::Display::fmt(self, f)
67    }
68}
69impl std::fmt::Display for CategoryCode {
70    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
71        use CategoryCode as CC;
72        write!(
73            f,
74            "{}",
75            match self {
76                CC::Escape => "escape",
77                CC::BeginGroup => "begin group",
78                CC::EndGroup => "end group",
79                CC::MathShift => "math shift",
80                CC::AlignmentTab => "alignment",
81                CC::EOL => "end of line",
82                CC::Parameter => "parameter",
83                CC::Superscript => "superscript",
84                CC::Subscript => "subscript",
85                CC::Ignored => "ignored",
86                CC::Space => "space",
87                CC::Letter => "letter",
88                CC::Other => "other",
89                CC::Active => "active",
90                CC::Comment => "comment",
91                CC::Invalid => "invalid",
92            }
93        )
94    }
95}
96
97impl From<CategoryCode> for u8 {
98    fn from(cc: CategoryCode) -> Self {
99        use CategoryCode as CC;
100        match cc {
101            CC::Escape => 0,
102            CC::BeginGroup => 1,
103            CC::EndGroup => 2,
104            CC::MathShift => 3,
105            CC::AlignmentTab => 4,
106            CC::EOL => 5,
107            CC::Parameter => 6,
108            CC::Superscript => 7,
109            CC::Subscript => 8,
110            CC::Ignored => 9,
111            CC::Space => 10,
112            CC::Letter => 11,
113            CC::Other => 12,
114            CC::Active => 13,
115            CC::Comment => 14,
116            CC::Invalid => 15,
117        }
118    }
119}
120
121impl TryFrom<u8> for CategoryCode {
122    type Error = ();
123    fn try_from(value: u8) -> Result<Self, Self::Error> {
124        use CategoryCode as CC;
125        Ok(match value {
126            0 => CC::Escape,
127            1 => CC::BeginGroup,
128            2 => CC::EndGroup,
129            3 => CC::MathShift,
130            4 => CC::AlignmentTab,
131            5 => CC::EOL,
132            6 => CC::Parameter,
133            7 => CC::Superscript,
134            8 => CC::Subscript,
135            9 => CC::Ignored,
136            10 => CC::Space,
137            11 => CC::Letter,
138            12 => CC::Other,
139            13 => CC::Active,
140            14 => CC::Comment,
141            15 => CC::Invalid,
142            _ => return Err(()),
143        })
144    }
145}
146
147/// A [`CategoryCodeScheme`] assigns a [`CategoryCode`] to each [`Character`].
148pub type CategoryCodeScheme<Char> = <Char as Character>::CharMap<CategoryCode>;
149
150/** The [`CategoryCodeScheme`] where all characters have [`CategoryCode::Other`] (12) except for
151         the space character, which has [`CategoryCode::Space`] (10).
152*/
153pub static OTHER_SCHEME_U8: CategoryCodeScheme<u8> = {
154    let mut catcodes = [CategoryCode::Other; 256];
155    catcodes[32] = CategoryCode::Space;
156    catcodes
157};
158
159/**
160The default [`CategoryCodeScheme`] for TeX, pre latex.ltx.
161
162All characters have [`CategoryCode::Other`] (12) except for:
163
164| Character    | Category Code   |
165|--------------|-----------------
166 | ` `          | [`Space`](CategoryCode::Space)|
167| a-z, A-Z     | [`Letter`](CategoryCode::Letter) |
168| `\`          | [`Escape`](CategoryCode::Other)  |
169| `\r`         | [`EOL`](CategoryCode::EOL)       |
170| `%`          | [`Comment`](CategoryCode::Comment)|
171 */
172pub static STARTING_SCHEME_U8: CategoryCodeScheme<u8> = {
173    let mut catcodes = [CategoryCode::Other; 256];
174    catcodes[92] = CategoryCode::Escape;
175    catcodes[32] = CategoryCode::Space;
176    catcodes[13] = CategoryCode::EOL;
177    catcodes[37] = CategoryCode::Comment;
178    const_for!(i in 65..91 => catcodes[i] = CategoryCode::Letter);
179    const_for!(i in 97..123 => catcodes[i] = CategoryCode::Letter);
180    catcodes[126] = CategoryCode::Active;
181    catcodes
182};
183
184/**
185The default [`CategoryCodeScheme`] used almost everywhere in LaTeX.
186
187All characters have [`CategoryCode::Other`] (12) except for:
188
189| Character    | Category Code   |
190|--------------|-----------------
191 | ` `          | [`Space`](CategoryCode::Space)|
192| a-z, A-Z     | [`Letter`](CategoryCode::Letter) |
193| `\`          | [`Escape`](CategoryCode::Other)  |
194| `\r`         | [`EOL`](CategoryCode::EOL)       |
195| `%`          | [`Comment`](CategoryCode::Comment)|
196| `~`          | [`Active`](CategoryCode::Active)  |
197| `#`          | [`Parameter`](CategoryCode::Parameter)|
198| `^`          | [`Superscript`](CategoryCode::Superscript)|
199| `_`          | [`Subscript`](CategoryCode::Subscript)|
200| `{`          | [`BeginGroup`](CategoryCode::BeginGroup)|
201| `}`          | [`EndGroup`](CategoryCode::EndGroup)|
202| `$`          | [`MathShift`](CategoryCode::MathShift)|
203| `&`          | [`AlignmentTab`](CategoryCode::AlignmentTab)|
204 */
205pub static DEFAULT_SCHEME_U8: CategoryCodeScheme<u8> = {
206    let mut catcodes = [CategoryCode::Other; 256];
207    catcodes[123] = CategoryCode::BeginGroup;
208    catcodes[125] = CategoryCode::EndGroup;
209    catcodes[36] = CategoryCode::MathShift;
210    catcodes[38] = CategoryCode::AlignmentTab;
211    catcodes[35] = CategoryCode::Parameter;
212    catcodes[94] = CategoryCode::Superscript;
213    catcodes[95] = CategoryCode::Subscript;
214    catcodes[126] = CategoryCode::Active;
215    catcodes[92] = CategoryCode::Escape;
216    catcodes[32] = CategoryCode::Space;
217    catcodes[13] = CategoryCode::EOL;
218    catcodes[37] = CategoryCode::Comment;
219    const_for!(i in 65..91 => catcodes[i] = CategoryCode::Letter);
220    const_for!(i in 97..123 => catcodes[i] = CategoryCode::Letter);
221    catcodes
222};
223
224/// Like [`DEFAULT_SCHEME_U8`](static@DEFAULT_SCHEME_U8), but with `@` as a letter.
225/// (i.e. as after `\makeatletter`)
226pub static AT_LETTER_SCHEME: CategoryCodeScheme<u8> = {
227    let mut catcodes = [CategoryCode::Other; 256];
228    catcodes[123] = CategoryCode::BeginGroup;
229    catcodes[125] = CategoryCode::EndGroup;
230    catcodes[36] = CategoryCode::MathShift;
231    catcodes[38] = CategoryCode::AlignmentTab;
232    catcodes[35] = CategoryCode::Parameter;
233    catcodes[94] = CategoryCode::Superscript;
234    catcodes[95] = CategoryCode::Subscript;
235    catcodes[126] = CategoryCode::Active;
236    catcodes[92] = CategoryCode::Escape;
237    catcodes[32] = CategoryCode::Space;
238    catcodes[13] = CategoryCode::EOL;
239    catcodes[37] = CategoryCode::Comment;
240    const_for!(i in 64..91 => catcodes[i] = CategoryCode::Letter);
241    const_for!(i in 97..123 => catcodes[i] = CategoryCode::Letter);
242    catcodes
243};
244
245/// After scanning a file, [`CategoryCode`]s such as [`EOL`](CategoryCode::EOL),
246/// [`Comment`](CategoryCode::Comment) or [`Invalid`](CategoryCode::Invalid)
247/// can not occur anymore. Instead, a [`Token`](super::tokens::Token) can represent e.g. a
248/// *numbered parameter* (e.g. `#1` in a macro expansion), or an end-of-file, or
249/// a `\noexpand` marker, or a marker for the end of an alignment cell, etc.
250#[derive(Copy, PartialEq, Eq, Clone, Debug)]
251pub enum CommandCode {
252    /// Escape character (0); usually `\`
253    Escape = 0,
254    /// Begin group character (1); usually `{`
255    BeginGroup = 1,
256    /// End group character (2); usually `}`
257    EndGroup = 2,
258    /// Math shift character (3); usually `$`
259    MathShift = 3,
260    /// Alignment tab character (4); usually `&`
261    AlignmentTab = 4,
262    /// End of file marker`
263    EOF = 5,
264    /// Parameter character (6); usually `#`
265    Parameter = 6,
266    /// Superscript character (7); usually `^`
267    Superscript = 7,
268    /// Subscript character (8); usually `_`
269    Subscript = 8,
270    /// marker for a primitive command
271    Primitive = 9,
272    /// Space character (10); usually ` `
273    Space = 10,
274    /// Letter character (11), usually a-z and A-Z
275    Letter = 11,
276    /// Other character (12), usually e.g. `@`, `!`, `?`, etc.
277    Other = 12,
278    /// Active character (13); usually `~`
279    Active = 13,
280    /// Argument Marker
281    Argument = 14,
282}
283impl CommandCode {
284    pub fn meaning<C: Character, CS: CSName<C>, W: CharWrite<C, CS>>(
285        &self,
286        c: C,
287        mut f: W,
288    ) -> std::fmt::Result {
289        match self {
290            CommandCode::BeginGroup => write!(f, "begin-group character "),
291            CommandCode::EndGroup => write!(f, "end-group character "),
292            CommandCode::MathShift => write!(f, "math shift character "),
293            CommandCode::Parameter => write!(f, "macro parameter character "),
294            CommandCode::Superscript => write!(f, "superscript character "),
295            CommandCode::Subscript => write!(f, "subscript character "),
296            CommandCode::Space => {
297                write!(f, "blank space  ")?;
298                return Ok(());
299            }
300            CommandCode::Letter => write!(f, "the letter "),
301            _ => write!(f, "the character "),
302        }?;
303        f.push_char(c);
304        Ok(())
305    }
306    pub const fn as_byte(self) -> u8 {
307        use CommandCode::*;
308        match self {
309            Escape => 0,
310            BeginGroup => 1,
311            EndGroup => 2,
312            MathShift => 3,
313            AlignmentTab => 4,
314            EOF => 5,
315            Parameter => 6,
316            Superscript => 7,
317            Subscript => 8,
318            Primitive => 9,
319            Space => 10,
320            Letter => 11,
321            Other => 12,
322            Active => 13,
323            Argument => 14,
324        }
325    }
326}
327
328impl From<CategoryCode> for CommandCode {
329    fn from(value: CategoryCode) -> Self {
330        match value {
331            CategoryCode::Escape => CommandCode::Escape,
332            CategoryCode::BeginGroup => CommandCode::BeginGroup,
333            CategoryCode::EndGroup => CommandCode::EndGroup,
334            CategoryCode::MathShift => CommandCode::MathShift,
335            CategoryCode::AlignmentTab => CommandCode::AlignmentTab,
336            CategoryCode::EOL => CommandCode::EOF,
337            CategoryCode::Parameter => CommandCode::Parameter,
338            CategoryCode::Superscript => CommandCode::Superscript,
339            CategoryCode::Subscript => CommandCode::Subscript,
340            CategoryCode::Space => CommandCode::Space,
341            CategoryCode::Letter => CommandCode::Letter,
342            CategoryCode::Other => CommandCode::Other,
343            CategoryCode::Active => CommandCode::Active,
344            _ => panic!("Invalid category code for command code: {:?}\n This is an implementation error and should not happen",value)
345        }
346    }
347}
348
349impl TryFrom<u8> for CommandCode {
350    type Error = ();
351    fn try_from(value: u8) -> Result<Self, Self::Error> {
352        use CommandCode::*;
353        Ok(match value {
354            0 => Escape,
355            1 => BeginGroup,
356            2 => EndGroup,
357            3 => MathShift,
358            4 => AlignmentTab,
359            5 => EOF,
360            6 => Parameter,
361            7 => Superscript,
362            8 => Subscript,
363            9 => Primitive,
364            10 => Space,
365            11 => Letter,
366            12 => Other,
367            13 => Active,
368            14 => Argument,
369            _ => return Err(()),
370        })
371    }
372}