flams_utils/
parsing.rs

1use crate::sourcerefs::{ByteOffset, SourcePos};
2use std::borrow::Cow;
3use std::fmt::{Debug, Display};
4use std::io::Read;
5
6pub trait StringOrStr<'a>:
7    AsRef<str>
8    + From<&'a str>
9    + Debug
10    + Display
11    + Eq
12    + std::hash::Hash
13    + Clone
14    + for<'b> PartialEq<&'b str>
15{
16    /// # Errors
17    ///
18    /// Will return `Err` if self does not start with prefix.
19    fn strip_prefix(self, s: &str) -> Result<Self, Self>;
20    #[must_use]
21    fn split_n(self, n:usize) -> (Self,Self);
22    fn trim_ws(&mut self);
23    fn split_noparens<const OPEN: char, const CLOSE: char>(
24        &'a self,
25        split_char: char,
26    ) -> impl Iterator<Item = &'a str>;
27    fn as_cow(&self) -> Cow<'a,str>;
28}
29impl<'a> StringOrStr<'a> for &'a str {
30    #[inline]
31    fn strip_prefix(self, s: &str) -> Result<Self, Self> {
32        str::strip_prefix(self, s).map(str::trim_start).ok_or(self)
33    }
34    #[inline]
35    fn split_n(self, n: usize) -> (Self,Self) {
36        (&self[..n],&self[n..])
37    }
38    #[inline]
39    fn trim_ws(&mut self) {
40        *self = self.trim();
41    }
42    fn split_noparens<const OPEN: char, const CLOSE: char>(
43        &'a self,
44        split_char: char,
45    ) -> impl Iterator<Item = &'a str> {
46        let mut depth = 0;
47        self.split(move |c: char| {
48            if c == OPEN {
49                depth += 1;
50                false
51            } else if c == CLOSE && depth > 0 {
52                depth -= 1;
53                false
54            } else if depth > 0 {
55                false
56            } else {
57                c == split_char
58            }
59        })
60    }
61    #[inline]
62    fn as_cow(&self) -> Cow<'a,str> {
63        Cow::Borrowed(self)
64    }
65}
66impl<'a> StringOrStr<'a> for String {
67    #[allow(clippy::option_if_let_else)]
68    fn strip_prefix(self, s: &str) -> Result<Self, Self> {
69        match str::strip_prefix(&self, s) {
70            Some(s) => Ok(s.trim_start().to_string()),
71            None => Err(self),
72        }
73    }
74    #[inline]
75    fn trim_ws(&mut self) {
76        *self = self.trim().to_string();
77    }
78    fn split_n(mut self, n:usize) -> (Self,Self) {
79        let r = self.split_off(n);
80        (self,r)
81    }
82    fn split_noparens<const OPEN: char, const CLOSE: char>(
83        &'a self,
84        split_char: char,
85    ) -> impl Iterator<Item = &'a str> {
86        let mut depth = 0;
87        self.split(move |c: char| {
88            if c == OPEN {
89                depth += 1;
90                false
91            } else if c == CLOSE && depth > 0 {
92                depth -= 1;
93                false
94            } else if depth > 0 {
95                false
96            } else {
97                c == split_char
98            }
99        })
100    }
101    #[inline]
102    fn as_cow(&self) -> Cow<'a,str> {
103        Cow::Owned(self.clone())
104    }
105}
106
107pub trait ParseSource<'a>: 'a {
108    type Pos: SourcePos;
109    type Str: StringOrStr<'a>;
110    type Source;
111    fn source(&self) -> &Self::Source;
112    fn curr_pos(&self) -> Self::Pos;
113    fn pop_head(&mut self) -> Option<char>;
114    fn read_until_line_end(&mut self) -> (Self::Str, Self::Pos);
115    fn trim_start(&mut self);
116    fn starts_with(&mut self, c: char) -> bool;
117    fn peek_head(&mut self) -> Option<char>;
118    fn read_n(&mut self, i: usize) -> Self::Str;
119    fn read_while(&mut self, pred: impl FnMut(char) -> bool) -> Self::Str;
120    #[inline]
121    fn read_until(&mut self, mut pred: impl FnMut(char) -> bool) -> Self::Str {
122        self.read_while(|c| !pred(c))
123    }
124    fn read_until_str(&mut self, s: &str) -> Self::Str;
125    fn read_until_with_brackets<const OPEN: char, const CLOSE: char>(
126        &mut self,
127        pred: impl FnMut(char) -> bool,
128    ) -> Self::Str;
129    fn skip(&mut self, i: usize);
130}
131
132pub struct ParseReader<R: Read, P: SourcePos> {
133    inner: R,
134    buf: Vec<char>,
135    pos: P,
136}
137impl<R: Read, P: SourcePos> ParseReader<R, P> {
138    pub fn new(inner: R) -> Self {
139        Self {
140            inner,
141            buf: Vec::new(),
142            pos: P::default(),
143        }
144    }
145}
146
147impl<'a, R: Read + 'a, P: SourcePos + 'a> ParseSource<'a> for ParseReader<R, P> {
148    type Pos = P;
149    type Str = String;
150    type Source = R;
151    #[inline]
152    fn source(&self) -> &Self::Source {
153        &self.inner
154    }
155    #[inline]
156    fn curr_pos(&self) -> P {
157        self.pos
158    }
159    #[inline]
160    fn skip(&mut self, i: usize) {
161        for _ in 0..i {
162            self.pop_head();
163        }
164    }
165    fn pop_head(&mut self) -> Option<char> {
166        match self.get_char() {
167            Some('\n') => {
168                self.pos.update_newline(false);
169                Some('\n')
170            }
171            Some('\r') => { 
172                match self.get_char() {
173                    Some('\n') => {
174                        self.pos.update_newline(true);
175                    }
176                    Some(c) => {
177                        self.pos.update_newline(false);
178                        self.push_char(c);
179                    }
180                    None => {
181                        self.pos.update_newline(false);
182                    }
183                }
184                Some('\n')
185            }
186            Some(c) => {
187                self.pos.update(c);
188                Some(c)
189            }
190            None => None,
191        }
192    }
193    fn read_until_line_end(&mut self) -> (String, P) {
194        let (s, rn) = self.find_line_end();
195        self.pos.update_str_no_newline(&s);
196        let pos = self.pos;
197        if let Some(rn) = rn {
198            self.pos.update_newline(rn);
199        }
200        (s, pos)
201    }
202    fn trim_start(&mut self) {
203        while let Some(c) = self.get_char() {
204            if c == '\n' {
205                self.pos.update_newline(false);
206            } else if c == '\r' {
207                match self.get_char() {
208                    Some('\n') => {
209                        self.pos.update_newline(true);
210                    }
211                    Some(c) => {
212                        self.push_char(c);
213                        self.pos.update_newline(false);
214                    }
215                    None => {
216                        self.pos.update_newline(false);
217                        break;
218                    }
219                }
220            } else if c.is_whitespace() {
221                self.pos.update(c);
222            } else {
223                self.push_char(c);
224                break;
225            }
226        }
227    }
228    
229    #[allow(clippy::unnecessary_map_or)]
230    fn starts_with(&mut self, c: char) -> bool {
231        self.get_char().map_or(false, |c2| {
232            self.push_char(c2);
233            c2 == c
234        })
235    }
236    fn read_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Self::Str {
237        let mut ret = String::new();
238        let mut rn = false;
239        while let Some(c) = self.get_char() {
240            if !pred(c) {
241                self.push_char(c);
242                break;
243            }
244            if rn && c == '\n' {
245                self.pos.update_newline(true);
246                rn = false;
247                continue
248            }
249            if rn {
250                self.pos.update_newline(false);
251                rn = false;
252            } else if c == '\n' {
253                self.pos.update_newline(false);
254                ret.push('\n');
255                continue
256            } else if c == '\r' {
257                ret.push('\n');
258                rn = true;
259                continue
260            }
261            self.pos.update(c);
262            ret.push(c);
263        }
264        if rn {self.pos.update_newline(false);}
265        ret
266    }
267    fn read_until_with_brackets<const OPEN: char, const CLOSE: char>(
268        &mut self,
269        mut pred: impl FnMut(char) -> bool,
270    ) -> Self::Str {
271        let mut ret = String::new();
272        let mut depth = 0;
273        let mut rn = false;
274        while let Some(c) = self.get_char() {
275            if c == OPEN {
276                depth += 1;
277                self.pos.update(c);
278                ret.push(c);
279                continue;
280            } else if c == CLOSE && depth > 0 {
281                depth -= 1;
282                self.pos.update(c);
283                ret.push(c);
284                continue;
285            } else if depth > 0 {
286                if rn && c == '\n' {
287                    self.pos.update_newline(true);
288                    rn = false;
289                    continue
290                }
291                if rn {
292                    self.pos.update_newline(false);
293                    rn = false;
294                } else if c == '\n' {
295                    self.pos.update_newline(false);
296                    ret.push('\n');
297                    continue
298                } else if c == '\r' {
299                    ret.push('\n');
300                    rn = true;
301                    continue
302                }
303                self.pos.update(c);
304                ret.push(c);
305                continue;
306            }
307            if pred(c) {
308                self.push_char(c);
309                break;
310            }
311            self.pos.update(c);
312            ret.push(c);
313        }
314        if rn {self.pos.update_newline(false);}
315        ret
316    }
317    fn peek_head(&mut self) -> Option<char> {
318        self.get_char().inspect(|c| {
319            self.push_char(*c);
320        })
321    }
322    fn read_n(&mut self, i: usize) -> Self::Str {
323        let mut ret = String::new();
324        for _ in 0..i {
325            if let Some(c) = self.pop_head() {
326                ret.push(c);
327            } else {
328                break;
329            }
330        }
331        ret
332    }
333    fn read_until_str(&mut self, s: &str) -> Self::Str {
334        let mut ret = String::new();
335        while let Some(c) = self.pop_head() {
336            ret.push(c);
337            if ret.ends_with(s) {
338                for _ in 0..s.len() {
339                    self.push_char(ret.pop().unwrap_or_else(|| unreachable!()));
340                }
341                return ret;
342            }
343        }
344        ret
345    }
346}
347
348impl<R: Read, P: SourcePos> ParseReader<R, P> {
349    fn get_char(&mut self) -> Option<char> {
350        self.buf.pop().or_else(|| self.read_char())
351    }
352    fn read_char(&mut self) -> Option<char> {
353        let mut byte = [0u8];
354        self.inner.read_exact(&mut byte).ok()?;
355        let byte = byte[0];
356        if byte & 224u8 == 192u8 {
357            // a two byte unicode character
358            let mut buf = [byte, 0];
359            self.inner.read_exact(&mut buf[1..]).ok()?;
360            Self::char_from_utf8(&buf)
361        } else if byte & 240u8 == 224u8 {
362            // a three byte unicode character
363            let mut buf = [byte, 0, 0];
364            self.inner.read_exact(&mut buf[1..]).ok()?;
365            Self::char_from_utf8(&buf)
366        } else if byte & 248u8 == 240u8 {
367            // a four byte unicode character
368            let mut buf = [byte, 0, 0, 0];
369            self.inner.read_exact(&mut buf[1..]).ok()?;
370            Self::char_from_utf8(&buf)
371        } else {
372            Some(byte as char)
373        }
374    }
375    fn push_char(&mut self, c: char) {
376        self.buf.push(c);
377    }
378    fn char_from_utf8(buf: &[u8]) -> Option<char> {
379        std::str::from_utf8(buf).ok().and_then(|s| s.chars().next())
380    }
381    fn find_line_end(&mut self) -> (String, Option<bool>) {
382        let mut ret = String::new();
383        while let Some(c) = self.get_char() {
384            if c == '\n' {
385                return (ret, Some(false));
386            }
387            if c == '\r' {
388                match self.get_char() {
389                    Some('\n') => return (ret, Some(true)),
390                    Some(c) => self.push_char(c),
391                    None => (),
392                }
393                return (ret, Some(true));
394            }
395            ret.push(c);
396        }
397        (ret, None)
398    }
399}
400
401pub struct ParseStr<'a, P: SourcePos> {
402    input: &'a str,
403    pub pos: P,
404}
405impl<'a, P: SourcePos> ParseStr<'a, P> {
406    #[must_use]
407    pub fn new(input: &'a str) -> Self {
408        Self {
409            input,
410            pos: P::default(),
411        }
412    }
413    #[inline]
414    pub fn starts_with_str(&self, s: &str) -> bool {
415        self.input.starts_with(s)
416    }
417    #[inline]
418    pub const fn rest(&self) -> &'a str {
419        self.input
420    }
421
422    pub fn read_until_inclusive(&mut self, pred: impl FnMut(char) -> bool) -> &'a str {
423        let i = self.input.find(pred).unwrap_or(self.input.len());
424        let (l, r) = self.input.split_at(i + 1);
425        self.input = r;
426        self.pos.update_str_maybe_newline(l);
427        l
428    }
429    pub fn drop_prefix(&mut self, s: &str) -> bool {
430        self.input.starts_with(s) && {
431            self.input = &self.input[s.len()..];
432            self.pos.update_str_maybe_newline(s);
433            true
434        }
435    }
436
437    pub fn preview_until_with_brackets<const OPEN: char, const CLOSE: char>(
438        &self,
439        mut pred: impl FnMut(char) -> bool,
440    ) -> &'a str {
441        let mut depth = 0;
442        let i = self
443            .input
444            .find(|c| {
445                if c == OPEN {
446                    depth += 1;
447                    false
448                } else if c == CLOSE && depth > 0 {
449                    depth -= 1;
450                    false
451                } else {
452                    depth == 0 && pred(c)
453                }
454            })
455            .unwrap_or(self.input.len());
456        let (l, _r) = self.input.split_at(i);
457        l
458    }
459
460    pub fn read_until_escaped(&mut self, find: char, escape: char) -> &'a str {
461        let mut chars = self.input.chars();
462        let mut i: usize = 0;
463        while let Some(c) = chars.next() {
464            if c == escape {
465                if let Some(c) = chars.next() {
466                    i += c.len_utf8();
467                }
468            } else if c == find {
469                let (l, r) = self.input.split_at(i);
470                self.input = r;
471                self.pos.update_str_maybe_newline(l);
472                return l;
473            }
474            i += c.len_utf8();
475        }
476        let ret = self.input;
477        self.input = "";
478        self.pos.update_str_maybe_newline(ret);
479        ret
480    }
481}
482impl ParseStr<'_, ByteOffset> {
483    #[inline]
484    pub fn offset(&mut self) -> &mut ByteOffset {
485        &mut self.pos
486    }
487}
488
489impl<'a, P: SourcePos + 'a> ParseSource<'a> for ParseStr<'a, P> {
490    type Pos = P;
491    type Str = &'a str;
492    type Source = &'a str;
493    fn source(&self) -> &Self::Source {
494        &self.input
495    }
496    #[inline]
497    fn curr_pos(&self) -> P {
498        self.pos
499    }
500    fn pop_head(&mut self) -> Option<char> {
501        if let Some(c) = self.input.chars().next() {
502            if c == '\n' {
503                self.pos.update_newline(false);
504                self.input = &self.input[1..];
505                Some('\n')
506            } else if c == '\r' {
507                if self.input.chars().nth(1) == Some('\n') {
508                    self.input = &self.input[2..];
509                    self.pos.update_newline(true);
510                } else {
511                    self.input = &self.input[1..];
512                    self.pos.update_newline(false);
513                }
514                Some('\n')
515            } else {
516                self.pos.update(c);
517                self.input = &self.input[c.len_utf8()..];
518                Some(c)
519            }
520        } else {
521            None
522        }
523    }
524    fn read_until_line_end(&mut self) -> (&'a str, P) {
525        if let Some(i) = self.input.find(['\r', '\n']) {
526            if self.input.as_bytes()[i] == b'\r' && self.input.as_bytes().get(i + 1) == Some(&b'\n')
527            {
528                let (l, r) = self.input.split_at(i);
529                self.input = &r[2..];
530                self.pos.update_str_no_newline(l);
531                let pos = self.pos;
532                self.pos.update_newline(true);
533                return (l, pos);
534            }
535            let (l, r) = self.input.split_at(i);
536            self.input = &r[1..];
537            self.pos.update_str_no_newline(l);
538            let pos = self.pos;
539            self.pos.update_newline(false);
540            (l, pos)
541        } else {
542            let ret = self.input;
543            self.pos.update_str_no_newline(ret);
544            self.input = "";
545            (ret, self.pos)
546        }
547    }
548    fn trim_start(&mut self) {
549        while let Some(c) = self.input.chars().next() {
550            if c == '\n' {
551                self.input = &self.input[1..];
552                self.pos.update_newline(false);
553            } else if c == '\r' {
554                self.input = &self.input[1..];
555                if self.input.starts_with('\n') {
556                    self.input = &self.input[1..];
557                    self.pos.update_newline(true);
558                } else {
559                    self.pos.update_newline(false);
560                }
561            } else if c.is_whitespace() {
562                self.input = &self.input[c.len_utf8()..];
563                self.pos.update(c);
564            } else {
565                break;
566            }
567        }
568    }
569    fn starts_with(&mut self, c: char) -> bool {
570        self.input.starts_with(c)
571    }
572    fn read_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Self::Str {
573        let i = self.input.find(|c| !pred(c)).unwrap_or(self.input.len());
574        let (l, r) = self.input.split_at(i);
575        self.input = r;
576        self.pos.update_str_maybe_newline(l);
577        l
578    }
579    fn read_until_with_brackets<const OPEN: char, const CLOSE: char>(
580        &mut self,
581        mut pred: impl FnMut(char) -> bool,
582    ) -> Self::Str {
583        let mut depth = 0;
584        let i = self
585            .input
586            .find(|c| {
587                if c == OPEN {
588                    depth += 1;
589                    false
590                } else if c == CLOSE && depth > 0 {
591                    depth -= 1;
592                    false
593                } else {
594                    depth == 0 && pred(c)
595                }
596            })
597            .unwrap_or(self.input.len());
598        let (l, r) = self.input.split_at(i);
599        self.input = r;
600        self.pos.update_str_maybe_newline(l);
601        l
602    }
603    fn peek_head(&mut self) -> Option<char> {
604        self.input.chars().next()
605    }
606    fn read_n(&mut self, i: usize) -> Self::Str {
607        let (l, mut r) = self.input.split_at(i);
608        if l.ends_with('\r') && r.starts_with('\n') {
609            r = &r[1..];
610        }
611        self.input = r;
612        self.pos.update_str_maybe_newline(l);
613        l
614    }
615    fn read_until_str(&mut self, s: &str) -> Self::Str {
616        if let Some(i) = self.input.find(s) {
617            let (l, r) = self.input.split_at(i);
618            self.input = r;
619            self.pos.update_str_maybe_newline(l);
620            l
621        } else {
622            let ret = self.input;
623            self.input = "";
624            self.pos.update_str_maybe_newline(ret);
625            ret
626        }
627    }
628    fn skip(&mut self, i: usize) {
629        let (a, b) = self.input.split_at(i);
630        self.input = b;
631        self.pos.update_str_maybe_newline(a);
632    }
633}