flams_utils/
parsing.rs

1use crate::sourcerefs::{ByteOffset, SourcePos};
2use std::borrow::Cow;
3use std::fmt::{Debug, Display};
4use std::io::Read;
5
6pub trait StringOrStr<'a>:
7    AsRef<str>
8    + From<&'a str>
9    + Debug
10    + Display
11    + Eq
12    + std::hash::Hash
13    + Clone
14    + for<'b> PartialEq<&'b str>
15{
16    /// # Errors
17    ///
18    /// Will return `Err` if self does not start with prefix.
19    fn strip_prefix(self, s: &str) -> Result<Self, Self>;
20    #[must_use]
21    fn split_n(self, n: usize) -> (Self, Self);
22    fn trim_ws(&mut self);
23    fn split_noparens<const OPEN: char, const CLOSE: char>(
24        &'a self,
25        split_char: char,
26    ) -> impl Iterator<Item = &'a str>;
27    fn as_cow(&self) -> Cow<'a, str>;
28}
29impl<'a> StringOrStr<'a> for &'a str {
30    #[inline]
31    fn strip_prefix(self, s: &str) -> Result<Self, Self> {
32        str::strip_prefix(self, s).map(str::trim_start).ok_or(self)
33    }
34    #[inline]
35    fn split_n(self, n: usize) -> (Self, Self) {
36        (&self[..n], &self[n..])
37    }
38    #[inline]
39    fn trim_ws(&mut self) {
40        *self = self.trim();
41    }
42    fn split_noparens<const OPEN: char, const CLOSE: char>(
43        &'a self,
44        split_char: char,
45    ) -> impl Iterator<Item = &'a str> {
46        let mut depth = 0;
47        self.split(move |c: char| {
48            if c == OPEN {
49                depth += 1;
50                false
51            } else if c == CLOSE && depth > 0 {
52                depth -= 1;
53                false
54            } else if depth > 0 {
55                false
56            } else {
57                c == split_char
58            }
59        })
60    }
61    #[inline]
62    fn as_cow(&self) -> Cow<'a, str> {
63        Cow::Borrowed(self)
64    }
65}
66impl<'a> StringOrStr<'a> for String {
67    #[allow(clippy::option_if_let_else)]
68    fn strip_prefix(self, s: &str) -> Result<Self, Self> {
69        match str::strip_prefix(&self, s) {
70            Some(s) => Ok(s.trim_start().to_string()),
71            None => Err(self),
72        }
73    }
74    #[inline]
75    fn trim_ws(&mut self) {
76        *self = self.trim().to_string();
77    }
78    fn split_n(mut self, n: usize) -> (Self, Self) {
79        let r = self.split_off(n);
80        (self, r)
81    }
82    fn split_noparens<const OPEN: char, const CLOSE: char>(
83        &'a self,
84        split_char: char,
85    ) -> impl Iterator<Item = &'a str> {
86        let mut depth = 0;
87        self.split(move |c: char| {
88            if c == OPEN {
89                depth += 1;
90                false
91            } else if c == CLOSE && depth > 0 {
92                depth -= 1;
93                false
94            } else if depth > 0 {
95                false
96            } else {
97                c == split_char
98            }
99        })
100    }
101    #[inline]
102    fn as_cow(&self) -> Cow<'a, str> {
103        Cow::Owned(self.clone())
104    }
105}
106
107pub trait ParseSource<'a>: 'a {
108    type Pos: SourcePos;
109    type Str: StringOrStr<'a>;
110    type Source;
111    fn source(&self) -> &Self::Source;
112    fn curr_pos(&self) -> Self::Pos;
113    fn pop_head(&mut self) -> Option<char>;
114    fn read_until_line_end(&mut self) -> (Self::Str, Self::Pos);
115    fn trim_start(&mut self);
116    fn starts_with(&mut self, c: char) -> bool;
117    fn peek_head(&mut self) -> Option<char>;
118    fn read_n(&mut self, i: usize) -> Self::Str;
119    fn read_while(&mut self, pred: impl FnMut(char) -> bool) -> Self::Str;
120    #[inline]
121    fn read_until(&mut self, mut pred: impl FnMut(char) -> bool) -> Self::Str {
122        self.read_while(|c| !pred(c))
123    }
124    fn read_until_str(&mut self, s: &str) -> Self::Str;
125    fn read_until_with_brackets<const OPEN: char, const CLOSE: char>(
126        &mut self,
127        pred: impl FnMut(char) -> bool,
128    ) -> Self::Str;
129    fn skip(&mut self, i: usize);
130}
131
132pub struct ParseReader<R: Read, P: SourcePos> {
133    inner: R,
134    buf: Vec<char>,
135    pos: P,
136}
137impl<R: Read, P: SourcePos> ParseReader<R, P> {
138    pub fn new(inner: R) -> Self {
139        Self {
140            inner,
141            buf: Vec::new(),
142            pos: P::default(),
143        }
144    }
145}
146
147impl<'a, R: Read + 'a, P: SourcePos + 'a> ParseSource<'a> for ParseReader<R, P> {
148    type Pos = P;
149    type Str = String;
150    type Source = R;
151    #[inline]
152    fn source(&self) -> &Self::Source {
153        &self.inner
154    }
155    #[inline]
156    fn curr_pos(&self) -> P {
157        self.pos
158    }
159    #[inline]
160    fn skip(&mut self, i: usize) {
161        for _ in 0..i {
162            self.pop_head();
163        }
164    }
165    fn pop_head(&mut self) -> Option<char> {
166        match self.get_char() {
167            Some('\n') => {
168                self.pos.update_newline(false);
169                Some('\n')
170            }
171            Some('\r') => {
172                match self.get_char() {
173                    Some('\n') => {
174                        self.pos.update_newline(true);
175                    }
176                    Some(c) => {
177                        self.pos.update_newline(false);
178                        self.push_char(c);
179                    }
180                    None => {
181                        self.pos.update_newline(false);
182                    }
183                }
184                Some('\n')
185            }
186            Some(c) => {
187                self.pos.update(c);
188                Some(c)
189            }
190            None => None,
191        }
192    }
193    fn read_until_line_end(&mut self) -> (String, P) {
194        let (s, rn) = self.find_line_end();
195        self.pos.update_str_no_newline(&s);
196        let pos = self.pos;
197        if let Some(rn) = rn {
198            self.pos.update_newline(rn);
199        }
200        (s, pos)
201    }
202    fn trim_start(&mut self) {
203        while let Some(c) = self.get_char() {
204            if c == '\n' {
205                self.pos.update_newline(false);
206            } else if c == '\r' {
207                match self.get_char() {
208                    Some('\n') => {
209                        self.pos.update_newline(true);
210                    }
211                    Some(c) => {
212                        self.push_char(c);
213                        self.pos.update_newline(false);
214                    }
215                    None => {
216                        self.pos.update_newline(false);
217                        break;
218                    }
219                }
220            } else if c.is_whitespace() {
221                self.pos.update(c);
222            } else {
223                self.push_char(c);
224                break;
225            }
226        }
227    }
228
229    #[allow(clippy::unnecessary_map_or)]
230    fn starts_with(&mut self, c: char) -> bool {
231        self.get_char().map_or(false, |c2| {
232            self.push_char(c2);
233            c2 == c
234        })
235    }
236    fn read_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Self::Str {
237        let mut ret = String::new();
238        let mut rn = false;
239        while let Some(c) = self.get_char() {
240            if !pred(c) {
241                self.push_char(c);
242                break;
243            }
244            if rn && c == '\n' {
245                self.pos.update_newline(true);
246                rn = false;
247                continue;
248            }
249            if rn {
250                self.pos.update_newline(false);
251                rn = false;
252            } else if c == '\n' {
253                self.pos.update_newline(false);
254                ret.push('\n');
255                continue;
256            } else if c == '\r' {
257                ret.push('\n');
258                rn = true;
259                continue;
260            }
261            self.pos.update(c);
262            ret.push(c);
263        }
264        if rn {
265            self.pos.update_newline(false);
266        }
267        ret
268    }
269    fn read_until_with_brackets<const OPEN: char, const CLOSE: char>(
270        &mut self,
271        mut pred: impl FnMut(char) -> bool,
272    ) -> Self::Str {
273        let mut ret = String::new();
274        let mut depth = 0;
275        let mut rn = false;
276        while let Some(c) = self.get_char() {
277            if c == OPEN {
278                depth += 1;
279                self.pos.update(c);
280                ret.push(c);
281                continue;
282            } else if c == CLOSE && depth > 0 {
283                depth -= 1;
284                self.pos.update(c);
285                ret.push(c);
286                continue;
287            } else if depth > 0 {
288                if rn && c == '\n' {
289                    self.pos.update_newline(true);
290                    rn = false;
291                    continue;
292                }
293                if rn {
294                    self.pos.update_newline(false);
295                    rn = false;
296                } else if c == '\n' {
297                    self.pos.update_newline(false);
298                    ret.push('\n');
299                    continue;
300                } else if c == '\r' {
301                    ret.push('\n');
302                    rn = true;
303                    continue;
304                }
305                self.pos.update(c);
306                ret.push(c);
307                continue;
308            }
309            if pred(c) {
310                self.push_char(c);
311                break;
312            }
313            self.pos.update(c);
314            ret.push(c);
315        }
316        if rn {
317            self.pos.update_newline(false);
318        }
319        ret
320    }
321    fn peek_head(&mut self) -> Option<char> {
322        self.get_char().inspect(|c| {
323            self.push_char(*c);
324        })
325    }
326    fn read_n(&mut self, i: usize) -> Self::Str {
327        let mut ret = String::new();
328        for _ in 0..i {
329            if let Some(c) = self.pop_head() {
330                ret.push(c);
331            } else {
332                break;
333            }
334        }
335        ret
336    }
337    fn read_until_str(&mut self, s: &str) -> Self::Str {
338        let mut ret = String::new();
339        while let Some(c) = self.pop_head() {
340            ret.push(c);
341            if ret.ends_with(s) {
342                for _ in 0..s.len() {
343                    self.push_char(ret.pop().unwrap_or_else(|| unreachable!()));
344                }
345                return ret;
346            }
347        }
348        ret
349    }
350}
351
352impl<R: Read, P: SourcePos> ParseReader<R, P> {
353    fn get_char(&mut self) -> Option<char> {
354        self.buf.pop().or_else(|| self.read_char())
355    }
356    fn read_char(&mut self) -> Option<char> {
357        let mut byte = [0u8];
358        self.inner.read_exact(&mut byte).ok()?;
359        let byte = byte[0];
360        if byte & 224u8 == 192u8 {
361            // a two byte unicode character
362            let mut buf = [byte, 0];
363            self.inner.read_exact(&mut buf[1..]).ok()?;
364            Self::char_from_utf8(&buf)
365        } else if byte & 240u8 == 224u8 {
366            // a three byte unicode character
367            let mut buf = [byte, 0, 0];
368            self.inner.read_exact(&mut buf[1..]).ok()?;
369            Self::char_from_utf8(&buf)
370        } else if byte & 248u8 == 240u8 {
371            // a four byte unicode character
372            let mut buf = [byte, 0, 0, 0];
373            self.inner.read_exact(&mut buf[1..]).ok()?;
374            Self::char_from_utf8(&buf)
375        } else {
376            Some(byte as char)
377        }
378    }
379    fn push_char(&mut self, c: char) {
380        self.buf.push(c);
381    }
382    fn char_from_utf8(buf: &[u8]) -> Option<char> {
383        std::str::from_utf8(buf).ok().and_then(|s| s.chars().next())
384    }
385    fn find_line_end(&mut self) -> (String, Option<bool>) {
386        let mut ret = String::new();
387        while let Some(c) = self.get_char() {
388            if c == '\n' {
389                return (ret, Some(false));
390            }
391            if c == '\r' {
392                match self.get_char() {
393                    Some('\n') => return (ret, Some(true)),
394                    Some(c) => self.push_char(c),
395                    None => (),
396                }
397                return (ret, Some(true));
398            }
399            ret.push(c);
400        }
401        (ret, None)
402    }
403}
404
405pub struct ParseStr<'a, P: SourcePos> {
406    input: &'a str,
407    pub pos: P,
408}
409impl<'a, P: SourcePos> ParseStr<'a, P> {
410    #[must_use]
411    pub fn new(input: &'a str) -> Self {
412        Self {
413            input,
414            pos: P::default(),
415        }
416    }
417    #[inline]
418    pub fn starts_with_str(&self, s: &str) -> bool {
419        self.input.starts_with(s)
420    }
421    #[inline]
422    pub const fn rest(&self) -> &'a str {
423        self.input
424    }
425
426    pub fn read_until_inclusive(&mut self, pred: impl FnMut(char) -> bool) -> &'a str {
427        let i = self.input.find(pred).unwrap_or(self.input.len());
428        let (l, r) = self.input.split_at(i + 1);
429        self.input = r;
430        self.pos.update_str_maybe_newline(l);
431        l
432    }
433    pub fn drop_prefix(&mut self, s: &str) -> bool {
434        self.input.starts_with(s) && {
435            self.input = &self.input[s.len()..];
436            self.pos.update_str_maybe_newline(s);
437            true
438        }
439    }
440
441    pub fn preview_until_with_brackets<const OPEN: char, const CLOSE: char>(
442        &self,
443        mut pred: impl FnMut(char) -> bool,
444    ) -> &'a str {
445        let mut depth = 0;
446        let i = self
447            .input
448            .find(|c| {
449                if c == OPEN {
450                    depth += 1;
451                    false
452                } else if c == CLOSE && depth > 0 {
453                    depth -= 1;
454                    false
455                } else {
456                    depth == 0 && pred(c)
457                }
458            })
459            .unwrap_or(self.input.len());
460        let (l, _r) = self.input.split_at(i);
461        l
462    }
463
464    pub fn read_until_escaped(&mut self, find: char, escape: char) -> &'a str {
465        let mut chars = self.input.chars();
466        let mut i: usize = 0;
467        while let Some(c) = chars.next() {
468            if c == escape {
469                if let Some(c) = chars.next() {
470                    i += c.len_utf8();
471                }
472            } else if c == find {
473                let (l, r) = self.input.split_at(i);
474                self.input = r;
475                self.pos.update_str_maybe_newline(l);
476                return l;
477            }
478            i += c.len_utf8();
479        }
480        let ret = self.input;
481        self.input = "";
482        self.pos.update_str_maybe_newline(ret);
483        ret
484    }
485}
486impl ParseStr<'_, ByteOffset> {
487    #[inline]
488    pub const fn offset(&mut self) -> &mut ByteOffset {
489        &mut self.pos
490    }
491}
492
493impl<'a, P: SourcePos + 'a> ParseSource<'a> for ParseStr<'a, P> {
494    type Pos = P;
495    type Str = &'a str;
496    type Source = &'a str;
497    fn source(&self) -> &Self::Source {
498        &self.input
499    }
500    #[inline]
501    fn curr_pos(&self) -> P {
502        self.pos
503    }
504    fn pop_head(&mut self) -> Option<char> {
505        if let Some(c) = self.input.chars().next() {
506            if c == '\n' {
507                self.pos.update_newline(false);
508                self.input = &self.input[1..];
509                Some('\n')
510            } else if c == '\r' {
511                if self.input.chars().nth(1) == Some('\n') {
512                    self.input = &self.input[2..];
513                    self.pos.update_newline(true);
514                } else {
515                    self.input = &self.input[1..];
516                    self.pos.update_newline(false);
517                }
518                Some('\n')
519            } else {
520                self.pos.update(c);
521                self.input = &self.input[c.len_utf8()..];
522                Some(c)
523            }
524        } else {
525            None
526        }
527    }
528    fn read_until_line_end(&mut self) -> (&'a str, P) {
529        if let Some(i) = self.input.find(['\r', '\n']) {
530            if self.input.as_bytes()[i] == b'\r' && self.input.as_bytes().get(i + 1) == Some(&b'\n')
531            {
532                let (l, r) = self.input.split_at(i);
533                self.input = &r[2..];
534                self.pos.update_str_no_newline(l);
535                let pos = self.pos;
536                self.pos.update_newline(true);
537                return (l, pos);
538            }
539            let (l, r) = self.input.split_at(i);
540            self.input = &r[1..];
541            self.pos.update_str_no_newline(l);
542            let pos = self.pos;
543            self.pos.update_newline(false);
544            (l, pos)
545        } else {
546            let ret = self.input;
547            self.pos.update_str_no_newline(ret);
548            self.input = "";
549            (ret, self.pos)
550        }
551    }
552    fn trim_start(&mut self) {
553        while let Some(c) = self.input.chars().next() {
554            if c == '\n' {
555                self.input = &self.input[1..];
556                self.pos.update_newline(false);
557            } else if c == '\r' {
558                self.input = &self.input[1..];
559                if self.input.starts_with('\n') {
560                    self.input = &self.input[1..];
561                    self.pos.update_newline(true);
562                } else {
563                    self.pos.update_newline(false);
564                }
565            } else if c.is_whitespace() {
566                self.input = &self.input[c.len_utf8()..];
567                self.pos.update(c);
568            } else {
569                break;
570            }
571        }
572    }
573    fn starts_with(&mut self, c: char) -> bool {
574        self.input.starts_with(c)
575    }
576    fn read_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Self::Str {
577        let i = self.input.find(|c| !pred(c)).unwrap_or(self.input.len());
578        let (l, r) = self.input.split_at(i);
579        self.input = r;
580        self.pos.update_str_maybe_newline(l);
581        l
582    }
583    fn read_until_with_brackets<const OPEN: char, const CLOSE: char>(
584        &mut self,
585        mut pred: impl FnMut(char) -> bool,
586    ) -> Self::Str {
587        let mut depth = 0;
588        let i = self
589            .input
590            .find(|c| {
591                if c == OPEN {
592                    depth += 1;
593                    false
594                } else if c == CLOSE && depth > 0 {
595                    depth -= 1;
596                    false
597                } else {
598                    depth == 0 && pred(c)
599                }
600            })
601            .unwrap_or(self.input.len());
602        let (l, r) = self.input.split_at(i);
603        self.input = r;
604        self.pos.update_str_maybe_newline(l);
605        l
606    }
607    fn peek_head(&mut self) -> Option<char> {
608        self.input.chars().next()
609    }
610    fn read_n(&mut self, i: usize) -> Self::Str {
611        let (l, mut r) = self.input.split_at(i);
612        if l.ends_with('\r') && r.starts_with('\n') {
613            r = &r[1..];
614        }
615        self.input = r;
616        self.pos.update_str_maybe_newline(l);
617        l
618    }
619    fn read_until_str(&mut self, s: &str) -> Self::Str {
620        if let Some(i) = self.input.find(s) {
621            let (l, r) = self.input.split_at(i);
622            self.input = r;
623            self.pos.update_str_maybe_newline(l);
624            l
625        } else {
626            let ret = self.input;
627            self.input = "";
628            self.pos.update_str_maybe_newline(ret);
629            ret
630        }
631    }
632    fn skip(&mut self, i: usize) {
633        let (a, b) = self.input.split_at(i);
634        self.input = b;
635        self.pos.update_str_maybe_newline(a);
636    }
637}