Skip to main content

tex_glyphs/
glyphs.rs

1/*! [`Glyph`] and [`GlyphList`] types.*/
2use std::fmt::{Debug, Display, Write};
3
4/// A glyph is a character in some font.
5#[derive(Clone, PartialEq, Eq, Hash)]
6pub struct Glyph(pub(crate) GlyphI);
7
8/// A Combinator (e.g. negation slash, umlaut-dots etc.); can be applied to yield a new
9/// unicode character
10pub struct Combinator(char);
11impl Combinator {
12    /// Apply the combinator to a char
13    pub fn apply_char(&self, c: char) -> String {
14        use unicode_normalization::char::compose;
15        if let Some(c) = compose(c, self.0) {
16            c.to_string()
17        } else if self.0 == '\u{0338}' && c == ' ' {
18            "/".to_string()
19        } else {
20            let mut r = String::with_capacity(c.len_utf8() + self.0.len_utf8());
21            r.push(c);
22            r.push(self.0);
23            r
24        }
25    }
26
27    /// Apply the combinator to a string slice
28    pub fn apply_str(&self, s: &str) -> String {
29        let mut chars = s.chars();
30        let mut ret = if let Some(n) = chars.next() {
31            self.apply_char(n)
32        } else {
33            self.apply_char(' ')
34        };
35        for c in chars {
36            ret.push(c);
37        }
38        ret
39    }
40
41    /// Apply the combinator to a glyph
42    // TODO optimize
43    pub fn apply_glyph(&self, g: &Glyph) -> String {
44        self.apply_str(&g.to_string())
45    }
46}
47
48impl Glyph {
49    /// Get the name of this glyph as a [`GlyphName`], e.g.
50    /// ```
51    /// # use tex_glyphs::glyphs::Glyph;
52    /// assert_eq!(&Glyph::get("Gamma").to_string(),"Γ");
53    /// ```
54    #[must_use]
55    pub const fn name(&self) -> GlyphName {
56        GlyphName(&self.0)
57    }
58    /// Get the undefined glyph (i.e. the glyph with name `.undefined`).
59    #[must_use]
60    pub const fn undefined() -> Self {
61        Self(GlyphI::S(0))
62    }
63    /// Whether this glyph is defined.
64    #[must_use]
65    pub const fn is_defined(&self) -> bool {
66        match self.0 {
67            GlyphI::S(i) => i != 0,
68            GlyphI::Undefined(_) => false,
69            _ => true,
70        }
71    }
72
73    /// Convert this glyph to a [`Combinator`]
74    #[inline]
75    pub fn as_combinator(&self) -> Option<Combinator> {
76        self.0.as_combinator()
77    }
78
79    /// Lookup a glyph by *value*, i.e. `Glyph::lookup("Γ")` returns the glyph with name `Gamma`.
80    /// ```
81    /// # use tex_glyphs::glyphs::Glyph;
82    /// assert_eq!(Glyph::lookup("Γ").unwrap(),Glyph::get("Gamma"));
83    /// ```
84    #[must_use]
85    pub fn lookup(s: &str) -> Option<Self> {
86        crate::GLYPH_LOOKUP.get(s).map(|g| Self(GlyphI::S(*g)))
87    }
88
89    /// Returns the glyph with the given name or the undefined glyph if no such glyph exists.
90    pub fn get<S: AsRef<str>>(s: S) -> Self {
91        let s = s.as_ref();
92        get_i(s).map_or_else(|| Self(GlyphI::Undefined(s.into())), |g| g)
93    }
94}
95impl Display for Glyph {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        Display::fmt(&self.0, f)
98    }
99}
100impl Debug for Glyph {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        write!(f, "Glyph({})=\'{}\'", self.name(), self)
103    }
104}
105
106/// A list of glyphs in some font.
107#[derive(Clone, PartialEq, Eq, Hash, Debug)]
108pub struct GlyphList(pub(crate) [Glyph; 256]);
109impl GlyphList {
110    /// Get the glyph at the given index.
111    #[must_use]
112    pub fn get(&self, c: u8) -> Glyph {
113        self.0[c as usize].clone()
114    }
115    /// Returns false if this is the undefined glyph list, where every glyph is undefined.
116    #[must_use]
117    pub fn is_defined(&self) -> bool {
118        *self != UNDEFINED_LIST
119    }
120}
121
122/// Utility struct for displaying the name of a [`Glyph`] (e.g. `uni0041`, `A` or `Gamma`).
123pub struct GlyphName<'a>(&'a GlyphI);
124impl Display for GlyphName<'_> {
125    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126        match &self.0 {
127            GlyphI::S(i) => f.write_str(crate::GLYPH_NAMES[*i as usize]),
128            GlyphI::Unicode(c) => write!(f, "uni{:04X}", *c as u32),
129            GlyphI::Ls(ls) => {
130                for g in ls {
131                    Display::fmt(&GlyphName(g), f)?;
132                }
133                Ok(())
134            }
135            GlyphI::Undefined(s) => Display::fmt(s, f),
136        }
137    }
138}
139
140#[derive(Clone, PartialEq, Eq, Hash)]
141pub(crate) enum GlyphI {
142    S(u16),
143    Unicode(char),
144    Ls(Box<[GlyphI]>),
145    Undefined(Box<str>),
146}
147impl GlyphI {
148    fn as_combinator(&self) -> Option<Combinator> {
149        use unicode_normalization::UnicodeNormalization;
150        match self {
151            Self::S(i) => {
152                let mut it = crate::GLYPH_LIST[*i as usize].nfd();
153                if it.next() == Some('◌') {
154                    it.next().and_then(|c| {
155                        if it.next().is_none() {
156                            Some(Combinator(c))
157                        } else {
158                            None
159                        }
160                    })
161                } else {
162                    None
163                }
164            }
165            Self::Unicode(c) => {
166                let s = c.to_string();
167                let mut it = s.nfd();
168                if it.next() == Some('◌') {
169                    it.next().and_then(|c| {
170                        if it.next().is_none() {
171                            Some(Combinator(c))
172                        } else {
173                            None
174                        }
175                    })
176                } else {
177                    None
178                }
179            }
180            _ => None,
181        }
182    }
183}
184impl Display for GlyphI {
185    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
186        match self {
187            Self::S(i) => Display::fmt(&crate::GLYPH_LIST[*i as usize], f),
188            Self::Unicode(c) => f.write_char(*c),
189            Self::Ls(ls) => {
190                for g in ls {
191                    Display::fmt(g, f)?;
192                }
193                Ok(())
194            }
195            Self::Undefined(_) => f.write_str("???"),
196        }
197    }
198}
199
200pub(crate) const UNDEFINED: Glyph = Glyph(GlyphI::S(0));
201pub(crate) const UNDEFINED_LIST: GlyphList = GlyphList([UNDEFINED; 256]);
202
203#[allow(clippy::cognitive_complexity)]
204fn get_i(s: &str) -> Option<Glyph> {
205    match crate::GLYPH_MAP.get(s) {
206        Some(g) => Some(Glyph(GlyphI::S(*g))),
207        None if s.starts_with('.') => get_i(&s[1..]),
208        None if s.contains('.') => {
209            let Some(i) = s.find('.') else { unreachable!() };
210            get_i(&s[..i])
211        }
212        None if s.ends_with("_SC") => {
213            get_i(&s[..s.len() - 3]) // TODO - remove or convert to smallcaps?
214        }
215        None if s.ends_with("_os") => {
216            get_i(&s[..s.len() - 3]) // TODO - what does "osf"/"os" signify?
217        }
218        None if s.ends_with("_sub")
219            || s.ends_with("_sup")
220            || s.ends_with("_SUB")
221            || s.ends_with("_SUP") =>
222        {
223            get_i(&s[..s.len() - 4])
224        } // TODO unicode does not have subscript/superscript letters (for the most part)
225        None if s.ends_with("superior") || s.ends_with("inferior") => get_i(&s[..s.len() - 8]),
226        None if s.ends_with("_swash") || s.ends_with("_short") => get_i(&s[..s.len() - 6]),
227        None if s.ends_with("_swash1") => get_i(&s[..s.len() - 7]),
228        None if s.ends_with("jmn") => get_i(&s[..s.len() - 3]),
229        None if s.ends_with("_alt") => get_i(&s[..s.len() - 4]),
230        None if s == "alt" => None, //UNDEFINED.clone(),
231        None if s.ends_with("alt") => get_i(&s[..s.len() - 3]),
232        None if s.ends_with("Greek") || s.ends_with("greek") => get_i(&s[..s.len() - 5]),
233        None if s == "text" => None, //UNDEFINED.clone(),
234        None if s.ends_with("text") => get_i(&s[..s.len() - 4]),
235        None if s.ends_with("display") => get_i(&s[..s.len() - 7]),
236        None if s.ends_with("disp") => get_i(&s[..s.len() - 4]),
237        None if s.starts_with('_') && s.ends_with('_') => get_i(&s[1..s.len() - 1]),
238        None if s.contains('_') => {
239            let rets = s
240                .split('_')
241                .filter(|v| !v.is_empty())
242                .map(get_i)
243                .collect::<Vec<_>>();
244            if rets.iter().any(Option::is_none) {
245                return None;
246            }
247            Some(Glyph(GlyphI::Ls(
248                rets.into_iter()
249                    .map(|o| o.unwrap_or_else(|| unreachable!()).0)
250                    .collect(),
251            )))
252        }
253        None if s.starts_with("uni") => {
254            match parse_unicode(&s[3..]) {
255                Some(Ok(c)) => Some(Glyph(GlyphI::Unicode(c))),
256                Some(Err(ls)) => Some(Glyph(GlyphI::Ls(ls))),
257                None => None, //UNDEFINED.clone()
258            }
259        }
260        None if s.starts_with('u') => {
261            match parse_unicode(&s[1..]) {
262                Some(Ok(c)) => Some(Glyph(GlyphI::Unicode(c))),
263                Some(Err(ls)) => Some(Glyph(GlyphI::Ls(ls))),
264                None => None, //UNDEFINED.clone()
265            }
266        }
267        None if s.ends_with('1') || s.ends_with('2') || s.ends_with('3') || s.ends_with('4') => {
268            get_i(&s[..s.len() - 1])
269        }
270        None if s == "SSsmall" => get_i("germandbls"),
271        None if s.starts_with("aux") => get_i(&s[3..]),
272        _ => None,
273    }
274}
275
276fn parse_one(s: &str) -> Option<char> {
277    u32::from_str_radix(s, 16)
278        .map(std::char::from_u32)
279        .unwrap_or(None)
280}
281fn parse_unicode(s: &str) -> Option<Result<char, Box<[GlyphI]>>> {
282    let mut s = s.trim_start();
283    if s.contains(' ') {
284        let r = s.split(' ').map(parse_one).collect::<Option<Vec<_>>>();
285        return r.map(|v| {
286            Err(v
287                .into_iter()
288                .map(GlyphI::Unicode)
289                .collect::<Vec<_>>()
290                .into())
291        });
292    }
293    if s.len() == 4 {
294        return parse_one(s).map(Ok);
295    }
296    if s.len() % 4 == 0 {
297        let mut v = Vec::new();
298        while !s.is_empty() {
299            match parse_one(&s[..4]) {
300                Some(c) => {
301                    s = s[4..].trim_start();
302                    v.push(GlyphI::Unicode(c));
303                }
304                None => return None,
305            }
306        }
307        Some(Err(v.into_boxed_slice()))
308    } else {
309        parse_one(s).map(Ok)
310    }
311}