compile_regex/parse/
mod.rs

1//! Regex parsing logic. Heavily inspired by AST parsing in the `regex-syntax` crate:
2//!
3//! <https://github.com/rust-lang/regex/blob/master/regex-syntax/src/ast/parse.rs>
4
5use core::{fmt, ops};
6
7use crate::{
8    ast::{CountedRepetition, GroupName, Node, Span, Spanned, Syntax},
9    utils::{
10        ceil_char_boundary, is_char_boundary, is_escapable_char, is_meta_char, split_first_char,
11        Stack,
12    },
13    Error, ErrorKind,
14};
15
16#[cfg(test)]
17mod tests;
18
19/// Result of [validating](RegexOptions::validate()) a regular expression.
20#[derive(Debug, Clone)]
21#[non_exhaustive]
22pub struct ValidationOutput {
23    /// Number of syntax [`Node`]s in the expression.
24    pub node_count: usize,
25}
26
27/// Regular expression parsing options.
28///
29/// # Examples
30///
31/// Initializing a regex with the "ignore whitespace" flag enabled:
32///
33/// ```
34/// use compile_regex::{ast::{Node, Syntax}, RegexOptions};
35///
36/// const SYNTAX: Syntax = RegexOptions::DEFAULT
37///     .ignore_whitespace(true)
38///     .parse(r"(?<digits> # This is a comment
39///         [ \d - ]+ # since `-` is a last non-whitespace char in a set,
40///                   ## it's a literal '-'
41///     )");
42///
43/// let comment_count = SYNTAX
44///     .iter()
45///     .filter(|spanned| matches!(spanned.node, Node::Comment))
46///     .count();
47/// // Sequential comments are glued together, so the number of comment nodes is 2
48/// // rather than 3.
49/// assert_eq!(comment_count, 2);
50/// ```
51#[derive(Debug, Default)]
52pub struct RegexOptions {
53    ignore_whitespace: bool,
54}
55
56impl RegexOptions {
57    /// Default options.
58    pub const DEFAULT: Self = Self {
59        ignore_whitespace: false,
60    };
61
62    /// Sets whether whitespace should be ignored when parsing. This is equivalent to setting `(?x)` flag
63    /// at the start of the regex.
64    #[must_use]
65    pub const fn ignore_whitespace(mut self, yes: bool) -> Self {
66        self.ignore_whitespace = yes;
67        self
68    }
69
70    /// Tries to validate the provided regular expression.
71    ///
72    /// # Errors
73    ///
74    /// Returns an error if the provided `regex` is not a valid regular expression.
75    pub const fn try_validate(&self, regex: &str) -> Result<ValidationOutput, Error> {
76        let mut state = <ParseState>::new(regex, self);
77        loop {
78            match state.step() {
79                Err(err) => return Err(err),
80                Ok(ops::ControlFlow::Break(())) => break,
81                Ok(ops::ControlFlow::Continue(())) => { /* continue */ }
82            }
83        }
84
85        Ok(ValidationOutput {
86            node_count: state.syntax.len(),
87        })
88    }
89
90    /// Validates the provided regular expression, panicking on errors. This is a shortcut for
91    /// [`Self::try_validate()`]`.unwrap()`.
92    ///
93    /// # Panics
94    ///
95    /// Panics if the provided `regex` is not a valid regular expression.
96    #[track_caller]
97    pub const fn validate(&self, regex: &str) -> ValidationOutput {
98        match self.try_validate(regex) {
99            Err(err) => err.compile_panic(regex),
100            Ok(output) => output,
101        }
102    }
103
104    /// Tries to parse the provided regular expression.
105    ///
106    /// The `CAP` constant specifies the capacity of the produced [`Syntax`]. If it is exceeded,
107    /// the method will return [`AstOverflow`](ErrorKind::AstOverflow). Use the [`parse!`](crate::parse!) macro
108    /// to determine the capacity automatically.
109    ///
110    /// # Errors
111    ///
112    /// - Returns an error if the provided `regex` is not a valid regular expression.
113    /// - Errors if one of internal limits is hit (e.g., the number of [syntax spans](Spanned)
114    ///   or the number of named captures).
115    pub const fn try_parse<const CAP: usize>(&self, regex: &str) -> Result<Syntax<CAP>, Error> {
116        let mut state = ParseState::custom(regex, self, true);
117        loop {
118            match state.step() {
119                Err(err) => return Err(err),
120                Ok(ops::ControlFlow::Break(())) => return Ok(state.into_syntax()),
121                Ok(ops::ControlFlow::Continue(())) => { /* continue */ }
122            }
123        }
124    }
125
126    /// Parses the provided regular expression, panicking on errors. This is a shortcut for
127    /// [`Self::try_parse()`]`.unwrap()`.
128    ///
129    /// # Panics
130    ///
131    /// Panics in the same situations in which [`Self::try_parse()`] returns an error.
132    #[track_caller]
133    pub const fn parse<const CAP: usize>(&self, regex: &str) -> Syntax<CAP> {
134        match self.try_parse(regex) {
135            Ok(spans) => spans,
136            Err(err) => err.compile_panic(regex),
137        }
138    }
139
140    /// Parses `regex` without restricting the number of produced [spans](Spanned).
141    ///
142    /// # Errors
143    ///
144    /// Returns an error if the provided `regex` is not a valid regular expression.
145    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
146    #[cfg(feature = "alloc")]
147    pub fn try_parse_to_vec(&self, regex: &str) -> Result<crate::alloc::Vec<Spanned>, Error> {
148        use core::mem;
149
150        use crate::alloc::Vec;
151
152        /// Max number of AST spans added during a single parsing step. This is roughly
153        /// 2x the max number of "main" / non-comment spans that can be added on a single step
154        /// (note that we glue line comments together).
155        const STEP_CAP: usize = 8;
156
157        let mut state = ParseState::<STEP_CAP>::custom(regex, self, true);
158        let mut syntax = Vec::new();
159        loop {
160            let step_result = state.step()?;
161            // Empty all captured spans to `syntax`. Since we never read from spans in the parser,
162            // this doesn't influence subsequent steps.
163            #[allow(clippy::missing_panics_doc)] // `syntax` is always set by design
164            let SyntaxOrLen::Syntax(new_syntax) = &mut state.syntax
165            else {
166                panic!("cannot happen");
167            };
168            let new_syntax = mem::replace(new_syntax, Syntax::new(Spanned::DUMMY));
169            syntax.extend_from_slice(new_syntax.as_slice());
170
171            if step_result.is_break() {
172                return Ok(syntax);
173            }
174        }
175    }
176}
177
178#[derive(Debug)]
179enum PrimitiveKind {
180    Literal(char),
181    PerlClass,
182    Other,
183}
184
185impl PrimitiveKind {
186    const fn is_valid_set_member(&self) -> bool {
187        matches!(self, Self::Literal(_) | Self::PerlClass)
188    }
189}
190
191#[derive(Debug, Clone, Copy)]
192struct Flags {
193    is_empty: bool,
194    ignore_whitespace: Option<bool>,
195}
196
197/// Parser state backup.
198#[derive(Debug)]
199struct Backup {
200    pos: usize,
201    span_count: usize,
202}
203
204/// Maximum supported group depth.
205const GROUP_DEPTH: usize = 16;
206/// Maximum supported number of named groups.
207const MAX_NAMED_GROUPS: usize = 64;
208
209#[derive(Debug, Clone, Copy)]
210struct GroupFrame {
211    /// `ignore_whitespace` value when the group was pushed to the stack (= one that should be set
212    /// when the group is popped from the stack).
213    prev_ignore_whitespace: bool,
214}
215
216impl GroupFrame {
217    const DUMMY: Self = Self {
218        prev_ignore_whitespace: false,
219    };
220}
221
222struct GroupNames<const CAP: usize> {
223    items: [Span; CAP],
224    len: usize,
225}
226
227impl<const CAP: usize> fmt::Debug for GroupNames<CAP> {
228    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
229        let (items, _) = self.items.split_at(self.len);
230        formatter.debug_list().entries(items).finish()
231    }
232}
233
234impl<const CAP: usize> GroupNames<CAP> {
235    const fn new() -> Self {
236        Self {
237            items: [Span::new(0, 0); CAP],
238            len: 0,
239        }
240    }
241
242    const fn insert(&mut self, item: Span, regex_bytes: &[u8]) -> Result<(), Error> {
243        const fn bytes_eq(bytes: &[u8], x: Span, y: Span) -> bool {
244            let mut i = 0;
245            while i < x.len() {
246                if bytes[x.start + i] != bytes[y.start + i] {
247                    return false;
248                }
249                i += 1;
250            }
251            true
252        }
253
254        if self.len == CAP {
255            return Err(ErrorKind::NamedGroupOverflow.with_position(item.start..item.end));
256        }
257
258        let item_len = item.len();
259        let mut i = 0;
260        while i < self.len {
261            let prev_pos = self.items[i];
262            if prev_pos.len() == item_len && bytes_eq(regex_bytes, prev_pos, item) {
263                let err = ErrorKind::DuplicateCaptureName {
264                    prev_pos: prev_pos.start..prev_pos.end,
265                };
266                return Err(err.with_position(item.start..item.end));
267            }
268            i += 1;
269        }
270
271        self.items[self.len] = item;
272        self.len += 1;
273        Ok(())
274    }
275}
276
277#[derive(Debug)]
278enum SyntaxOrLen<const CAP: usize> {
279    Syntax(Syntax<CAP>),
280    Len(usize),
281}
282
283impl<const CAP: usize> SyntaxOrLen<CAP> {
284    const fn new(with_syntax: bool) -> Self {
285        if with_syntax {
286            Self::Syntax(Syntax::new(Spanned::DUMMY))
287        } else {
288            Self::Len(0)
289        }
290    }
291
292    const fn len(&self) -> usize {
293        match self {
294            Self::Syntax(syntax) => syntax.len(),
295            Self::Len(len) => *len,
296        }
297    }
298
299    const fn trim(&mut self, new_len: usize) {
300        debug_assert!(self.len() >= new_len);
301        match self {
302            Self::Syntax(syntax) => syntax.trim(new_len),
303            Self::Len(len) => *len = new_len,
304        }
305    }
306}
307
308#[derive(Debug)]
309pub(crate) struct ParseState<'a, const CAP: usize = 0> {
310    /// Always a valid UTF-8 string.
311    regex_bytes: &'a [u8],
312    /// Points to the next char to be parsed in `regex_bytes`.
313    pos: usize,
314    groups: Stack<GroupFrame, GROUP_DEPTH>,
315    group_names: GroupNames<MAX_NAMED_GROUPS>,
316    set_depth: usize,
317    is_empty_last_item: bool,
318    ignore_whitespace: bool,
319    syntax: SyntaxOrLen<CAP>,
320}
321
322impl<'a> ParseState<'a> {
323    pub(crate) const fn new(regex: &'a str, options: &RegexOptions) -> Self {
324        Self::custom(regex, options, false)
325    }
326}
327
328impl<'a, const CAP: usize> ParseState<'a, CAP> {
329    pub(crate) const fn custom(regex: &'a str, options: &RegexOptions, with_syntax: bool) -> Self {
330        Self {
331            regex_bytes: regex.as_bytes(),
332            pos: 0,
333            groups: Stack::new(GroupFrame::DUMMY),
334            group_names: GroupNames::new(),
335            set_depth: 0,
336            is_empty_last_item: true,
337            ignore_whitespace: options.ignore_whitespace,
338            syntax: SyntaxOrLen::new(with_syntax),
339        }
340    }
341
342    const fn backup(&self) -> Backup {
343        Backup {
344            pos: self.pos,
345            span_count: self.syntax.len(),
346        }
347    }
348
349    const fn restore(&mut self, backup: &Backup) {
350        debug_assert!(self.pos >= backup.pos);
351
352        self.pos = backup.pos;
353        self.syntax.trim(backup.span_count);
354    }
355
356    const fn ascii_char_at(&self, pos: usize) -> Option<u8> {
357        if pos >= self.regex_bytes.len() {
358            None
359        } else {
360            let ch = self.regex_bytes[pos];
361            if ch <= 0x7f {
362                Some(ch)
363            } else {
364                None
365            }
366        }
367    }
368
369    const fn ascii_char(&self) -> Option<u8> {
370        self.ascii_char_at(self.pos)
371    }
372
373    const fn error(&self, start: usize, kind: ErrorKind) -> Error {
374        debug_assert!(start <= self.pos);
375        debug_assert!(start <= self.regex_bytes.len());
376        if start < self.regex_bytes.len() {
377            debug_assert!(is_char_boundary(self.regex_bytes[start]));
378        }
379
380        let end = if self.pos <= self.regex_bytes.len() {
381            ceil_char_boundary(self.regex_bytes, self.pos)
382        } else {
383            self.regex_bytes.len()
384        };
385        kind.with_position(start..end)
386    }
387
388    const fn push_ast(&mut self, start_pos: usize, node: Node) -> Result<(), Error> {
389        self.push_custom_ast(Span::new(start_pos, self.pos), node)
390    }
391
392    const fn push_custom_ast(&mut self, range: Span, node: Node) -> Result<(), Error> {
393        match &mut self.syntax {
394            SyntaxOrLen::Syntax(syntax) => {
395                let span = Spanned { node, span: range };
396                if syntax.push(span).is_err() {
397                    return Err(self.error(range.start, ErrorKind::AstOverflow));
398                }
399            }
400            SyntaxOrLen::Len(len) => *len += 1,
401        }
402        Ok(())
403    }
404
405    /// Gobbles the provided bytes if they are next. Otherwise, doesn't change the state.
406    const fn gobble(&mut self, bytes: &[u8]) -> bool {
407        let mut i = 0;
408        while i < bytes.len()
409            && self.pos + i < self.regex_bytes.len()
410            && bytes[i] == self.regex_bytes[self.pos + i]
411        {
412            i += 1;
413        }
414        if i == bytes.len() {
415            // All bytes have matched
416            self.pos += bytes.len();
417            true
418        } else {
419            false
420        }
421    }
422
423    const fn gobble_any(&mut self, needles: &[&[u8]]) -> bool {
424        let mut i = 0;
425        while i < needles.len() {
426            let matched = self.gobble(needles[i]);
427            if matched {
428                return true;
429            }
430            i += 1;
431        }
432        false
433    }
434
435    const fn matches(&self, range: ops::Range<usize>, needle: &[u8]) -> bool {
436        if range.end - range.start != needle.len() {
437            return false;
438        }
439        if range.end > self.regex_bytes.len() {
440            return false;
441        }
442
443        let mut i = 0;
444        while i < needle.len() && self.regex_bytes[range.start + i] == needle[i] {
445            i += 1;
446        }
447        i == needle.len()
448    }
449
450    const fn matches_any(&self, range: ops::Range<usize>, needles: &[&[u8]]) -> bool {
451        let mut i = 0;
452        while i < needles.len() {
453            if self.matches(range.start..range.end, needles[i]) {
454                return true;
455            }
456            i += 1;
457        }
458        false
459    }
460
461    const fn gobble_whitespace_and_comments(&mut self) -> Result<(), Error> {
462        if !self.ignore_whitespace {
463            return Ok(());
464        }
465
466        // Start of the very first comment.
467        let mut comment_range = None::<Span>;
468        let mut is_in_comment = false;
469        while !self.is_eof() {
470            let Some((ch, next_pos)) = split_first_char(self.regex_bytes, self.pos) else {
471                break; // reached EOF
472            };
473
474            if is_in_comment {
475                if ch == '\n' {
476                    is_in_comment = false;
477                    if let Some(range) = &mut comment_range {
478                        range.end = self.pos;
479                    }
480                }
481                self.pos = next_pos;
482            } else if ch.is_ascii_whitespace() {
483                // TODO: support non-ASCII whitespace? `char::is_whitespace` is const only since Rust 1.87
484                self.pos = next_pos;
485            } else if ch == '#' {
486                is_in_comment = true;
487                if comment_range.is_none() {
488                    comment_range = Some(Span::new(self.pos, self.pos + 1));
489                }
490                self.pos = next_pos;
491            } else {
492                break;
493            }
494        }
495
496        if let Some(mut range) = comment_range {
497            if is_in_comment {
498                // Reached EOF without a closing '\n', so we need to update the comment span here.
499                range.end = self.pos;
500            }
501            const_try!(self.push_custom_ast(range, Node::Comment));
502        }
503        Ok(())
504    }
505
506    /// Fails if the parser is currently at ASCII whitespace and whitespace is ignored.
507    const fn disallowed_error(&self, pos: usize) -> Error {
508        debug_assert!(self.ignore_whitespace);
509
510        let err = if self.regex_bytes[pos] == b'#' {
511            ErrorKind::DisallowedComment
512        } else {
513            debug_assert!(self.regex_bytes[pos].is_ascii_whitespace());
514            ErrorKind::DisallowedWhitespace
515        };
516        err.with_position(pos..pos + 1)
517    }
518
519    /// Peeks the first non-whitespace / comment char after the specified position.
520    const fn peek_whitespace(&self, mut pos: usize) -> Option<char> {
521        let mut is_in_comment = false;
522        while !self.is_eof() {
523            let Some((ch, next_pos)) = split_first_char(self.regex_bytes, pos) else {
524                break; // reached EOF
525            };
526
527            if is_in_comment {
528                if ch == '\n' {
529                    is_in_comment = false;
530                }
531                pos = next_pos;
532            } else if self.ignore_whitespace && ch.is_ascii_whitespace() {
533                // TODO: support non-ASCII whitespace? `char::is_whitespace` is const only since Rust 1.87
534                pos = next_pos;
535            } else if self.ignore_whitespace && ch == '#' {
536                is_in_comment = true;
537                pos = next_pos;
538            } else {
539                return Some(ch);
540            }
541        }
542        None
543    }
544
545    /// Parses uncounted repetition operation, e.g. `*` or `+?`.
546    const fn parse_uncounted_repetition(&mut self) -> Result<(), Error> {
547        let start_pos = self.pos;
548        // `pos` is currently at one of `?`, `*` or `+` (all ASCII chars)
549        self.pos += 1;
550        if self.is_empty_last_item {
551            return Err(self.error(self.pos - 1, ErrorKind::MissingRepetition));
552        }
553        // Parse optional non-greedy marker `?`
554        self.gobble(b"?");
555
556        const_try!(self.push_ast(start_pos, Node::UncountedRepetition));
557        Ok(())
558    }
559
560    /// Parses counted repetition, e.g., `{2}`, `{2,}` or `{2,5}`.
561    const fn parse_counted_repetition(&mut self) -> Result<(), Error> {
562        let start_pos = self.pos;
563        debug_assert!(self.regex_bytes[self.pos] == b'{');
564        self.pos += 1; // gobble '{'
565
566        if self.is_empty_last_item {
567            return Err(self.error(start_pos, ErrorKind::MissingRepetition));
568        }
569        const_try!(self.gobble_whitespace_and_comments());
570
571        // Minimum or exact count
572        let (min_count, min_count_span) = const_try!(self.parse_decimal());
573        let Some(min_count) = min_count else {
574            return Err(
575                ErrorKind::EmptyDecimal.with_position(min_count_span.start..min_count_span.end)
576            );
577        };
578        const_try!(self.gobble_whitespace_and_comments());
579
580        let Some(current_char) = self.ascii_char() else {
581            return Err(self.error(start_pos, ErrorKind::UnfinishedRepetition));
582        };
583        let max_count_span = if current_char == b',' {
584            self.pos += 1;
585            const_try!(self.gobble_whitespace_and_comments());
586
587            let max_count_start = self.pos;
588            // Maximum count
589            let (max_count, max_count_span) = const_try!(self.parse_decimal());
590            if let Some(count) = max_count {
591                if count < min_count {
592                    return Err(self.error(start_pos, ErrorKind::InvalidRepetitionRange));
593                }
594            } else if max_count_start != self.pos {
595                // `regex-syntax` quirk: if there's whitespace after `,`, but no digits, then the repetition is invalid.
596                // I.e., `{2,}` is valid, but `{2, }` is not.
597                return Err(self.error(max_count_start, ErrorKind::EmptyDecimal));
598            }
599            Some(max_count_span)
600        } else {
601            None
602        };
603
604        const_try!(self.gobble_whitespace_and_comments());
605        if matches!(self.ascii_char(), Some(b'}')) {
606            self.pos += 1;
607            const_try!(self.gobble_whitespace_and_comments());
608
609            // Parse optional non-greedy marker `?`
610            self.gobble(b"?");
611            const_try!(self.push_ast(
612                start_pos,
613                Node::CountedRepetition(match max_count_span {
614                    Some(span) if span.is_empty() => CountedRepetition::AtLeast(min_count_span),
615                    Some(span) => CountedRepetition::Between(min_count_span, span),
616                    None => CountedRepetition::Exactly(min_count_span),
617                })
618            ));
619            Ok(())
620        } else {
621            Err(self.error(start_pos, ErrorKind::UnfinishedRepetition))
622        }
623    }
624
625    /// This will trim leading and trailing whitespace regardless of the whitespace handling mode.
626    const fn parse_decimal(&mut self) -> Result<(Option<u32>, Span), Error> {
627        while !self.is_eof() && self.regex_bytes[self.pos].is_ascii_whitespace() {
628            self.pos += 1;
629        }
630
631        let start_pos = self.pos;
632        let mut pos = self.pos;
633        let mut decimal = 0_u32;
634        while pos < self.regex_bytes.len() && self.regex_bytes[pos].is_ascii_digit() {
635            let Some(new_decimal) = decimal.checked_mul(10) else {
636                return Err(ErrorKind::InvalidDecimal.with_position(start_pos..pos + 1));
637            };
638            decimal = match new_decimal.checked_add((self.regex_bytes[pos] - b'0') as u32) {
639                Some(dec) => dec,
640                None => return Err(ErrorKind::InvalidDecimal.with_position(start_pos..pos + 1)),
641            };
642            pos += 1;
643        }
644
645        let decimal = if pos == self.pos {
646            None
647        } else {
648            self.pos = pos;
649            Some(decimal)
650        };
651
652        while !self.is_eof() && self.regex_bytes[self.pos].is_ascii_whitespace() {
653            self.pos += 1;
654        }
655
656        if self.ignore_whitespace {
657            // Look ahead and check whether the next char ignoring whitespace is a digit.
658            // If so, the decimal would be valid as per regex-syntax, but it looks confusing, so we
659            // raise an error.
660            if let Some(ch) = self.peek_whitespace(pos) {
661                if ch.is_ascii_digit() {
662                    return Err(self.disallowed_error(pos));
663                }
664            }
665        }
666
667        Ok((decimal, Span::new(start_pos, pos)))
668    }
669
670    const fn parse_primitive(&mut self, ch: char, next_pos: usize) -> Result<(), Error> {
671        match ch {
672            '\\' => {
673                const_try!(self.parse_escape());
674                Ok(())
675            }
676            '.' => {
677                self.pos += 1;
678                const_try!(self.push_ast(self.pos - 1, Node::Dot));
679                Ok(())
680            }
681            '^' | '$' => {
682                self.pos += 1;
683                const_try!(self.push_ast(self.pos - 1, Node::LineAssertion));
684                Ok(())
685            }
686            _ => {
687                self.pos = next_pos;
688                Ok(())
689            }
690        }
691    }
692
693    const fn parse_escape(&mut self) -> Result<PrimitiveKind, Error> {
694        let start_pos = self.pos;
695        debug_assert!(self.regex_bytes[self.pos] == b'\\');
696        self.pos += 1;
697
698        let Some(current_char) = self.ascii_char() else {
699            return Err(self.error(start_pos, ErrorKind::UnfinishedEscape));
700        };
701        // Gobble the escaped single ASCII char
702        self.pos += 1;
703
704        match current_char {
705            b'0'..=b'9' => Err(self.error(start_pos, ErrorKind::UnsupportedBackref)),
706            b'x' | b'u' | b'U' => {
707                let ch = const_try!(self.parse_hex_escape(start_pos, current_char));
708                const_try!(self.push_ast(start_pos, Node::HexEscape));
709                Ok(PrimitiveKind::Literal(ch))
710            }
711            b'p' | b'P' => Err(self.error(start_pos, ErrorKind::UnicodeClassesNotSupported)),
712            b'd' | b's' | b'w' | b'D' | b'S' | b'W' => {
713                const_try!(self.push_ast(start_pos, Node::PerlClass));
714                Ok(PrimitiveKind::PerlClass)
715            }
716
717            b'n' => {
718                const_try!(self.push_ast(start_pos, Node::EscapedLiteral));
719                Ok(PrimitiveKind::Literal('\n'))
720            }
721            b't' => {
722                const_try!(self.push_ast(start_pos, Node::EscapedLiteral));
723                Ok(PrimitiveKind::Literal('\t'))
724            }
725            b'r' => {
726                const_try!(self.push_ast(start_pos, Node::EscapedLiteral));
727                Ok(PrimitiveKind::Literal('\r'))
728            }
729            b'a' => {
730                const_try!(self.push_ast(start_pos, Node::EscapedLiteral));
731                Ok(PrimitiveKind::Literal('\x07'))
732            }
733            b'f' => {
734                const_try!(self.push_ast(start_pos, Node::EscapedLiteral));
735                Ok(PrimitiveKind::Literal('\x0C'))
736            }
737            b'v' => {
738                const_try!(self.push_ast(start_pos, Node::EscapedLiteral));
739                Ok(PrimitiveKind::Literal('\x0B'))
740            }
741
742            b'A' | b'z' | b'B' | b'<' | b'>' => {
743                const_try!(self.push_ast(start_pos, Node::StdAssertion));
744                Ok(PrimitiveKind::Other)
745            }
746            b'b' => {
747                if !matches!(self.ascii_char(), Some(b'{'))
748                    || !const_try!(self.try_parse_word_boundary(start_pos))
749                {
750                    const_try!(self.push_ast(start_pos, Node::StdAssertion));
751                }
752                Ok(PrimitiveKind::Other)
753            }
754            ch if is_meta_char(ch) => {
755                const_try!(self.push_ast(start_pos, Node::EscapedChar { meta: true }));
756                Ok(PrimitiveKind::Literal(ch as char))
757            }
758            ch if is_escapable_char(ch) => {
759                const_try!(self.push_ast(start_pos, Node::EscapedChar { meta: false }));
760                Ok(PrimitiveKind::Literal(ch as char))
761            }
762            _ => Err(self.error(start_pos, ErrorKind::UnsupportedEscape)),
763        }
764    }
765
766    const fn try_parse_word_boundary(&mut self, escape_start: usize) -> Result<bool, Error> {
767        const fn is_valid_char(ch: u8) -> bool {
768            matches!(ch, b'A'..=b'Z' | b'a'..=b'z' | b'-')
769        }
770
771        const KNOWN_BOUNDARIES: &[&[u8]] = &[b"start", b"end", b"start-half", b"end-half"];
772
773        let backup = self.backup();
774        debug_assert!(self.regex_bytes[self.pos] == b'{');
775        self.pos += 1;
776
777        const_try!(self.gobble_whitespace_and_comments());
778        let start_pos = self.pos;
779        if self.is_eof() {
780            return Err(self.error(start_pos, ErrorKind::UnfinishedWordBoundary));
781        }
782
783        if !is_valid_char(self.regex_bytes[self.pos]) {
784            self.restore(&backup);
785            return Ok(false); // not a word boundary specifier
786        }
787
788        while !self.is_eof() && is_valid_char(self.regex_bytes[self.pos]) {
789            self.pos += 1;
790        }
791        let end_pos = self.pos;
792        const_try!(self.gobble_whitespace_and_comments());
793
794        if self.ignore_whitespace {
795            // `regex-syntax` allows whitespace / comments *inside* the specifier, which looks weird, so we don't allow it.
796            if let Some(ch) = self.peek_whitespace(self.pos) {
797                if ch.is_ascii() && is_valid_char(ch as u8) {
798                    return Err(self.disallowed_error(end_pos));
799                }
800            }
801        }
802
803        if self.is_eof() || self.regex_bytes[self.pos] != b'}' {
804            return Err(self.error(start_pos, ErrorKind::UnfinishedWordBoundary));
805        }
806
807        // Check whether the boundary specification is known.
808        if !self.matches_any(start_pos..end_pos, KNOWN_BOUNDARIES) {
809            return Err(ErrorKind::UnknownWordBoundary.with_position(start_pos..end_pos));
810        }
811
812        self.pos += 1; // gobble '}'
813        const_try!(self.push_ast(escape_start, Node::StdAssertion));
814        Ok(true)
815    }
816
817    /// Parses a hex-escaped char. The parser position is after the marker ('x', 'u' or 'U').
818    ///
819    /// Unlike `regex-syntax`, we don't allow any whitespace / comments *inside* the escape, so it's closer to Rust syntax,
820    /// and doesn't lead to terribly looking regexes:
821    ///
822    /// ```text
823    /// \u # can you guess
824    ///   0 # this is
825    /// 1 2 3 # a hex escape?
826    /// ```
827    const fn parse_hex_escape(&mut self, start_pos: usize, marker_ch: u8) -> Result<char, Error> {
828        let Some(current_char) = self.ascii_char() else {
829            return Err(self.error(start_pos, ErrorKind::UnfinishedEscape));
830        };
831        if current_char == b'{' {
832            // escape with braces, e.g., \u{123}
833            self.parse_hex_brace(start_pos)
834        } else {
835            let expected_digits = match marker_ch {
836                b'x' => 2,
837                b'u' => 4,
838                b'U' => 8,
839                _ => unreachable!(),
840            };
841            self.parse_hex_digits(start_pos, expected_digits)
842        }
843    }
844
845    const fn parse_hex_brace(&mut self, start_pos: usize) -> Result<char, Error> {
846        self.pos += 1; // gobble '{'
847
848        let first_digit_pos = self.pos;
849        let mut hex = 0_u32;
850        while self.pos < self.regex_bytes.len() && self.regex_bytes[self.pos] != b'}' {
851            let digit = self.regex_bytes[self.pos];
852            self.pos += 1;
853            let digit = match digit {
854                b'0'..=b'9' => digit - b'0',
855                b'a'..=b'f' => digit - b'a' + 10,
856                b'A'..=b'F' => digit - b'A' + 10,
857                _ if self.ignore_whitespace && digit.is_ascii_whitespace() => {
858                    // Return a more precise error
859                    return Err(self.error(self.pos - 1, ErrorKind::DisallowedWhitespace));
860                }
861                b'#' if self.ignore_whitespace => {
862                    // Return a more precise error
863                    return Err(self.error(self.pos - 1, ErrorKind::DisallowedComment));
864                }
865                _ => return Err(self.error(start_pos, ErrorKind::InvalidHex)),
866            };
867            if self.pos >= first_digit_pos + 8 {
868                return Err(self.error(start_pos, ErrorKind::NonUnicodeHex));
869            }
870            // No overflow can happen due to the check above
871            hex = hex * 16 + digit as u32;
872        }
873
874        if self.is_eof() {
875            return Err(self.error(start_pos, ErrorKind::UnfinishedEscape));
876        }
877
878        // Gobble the terminating '}'
879        debug_assert!(self.regex_bytes[self.pos] == b'}');
880        self.pos += 1;
881
882        if self.pos == first_digit_pos + 1 {
883            Err(self.error(start_pos, ErrorKind::EmptyHex))
884        } else {
885            match char::from_u32(hex) {
886                Some(ch) => Ok(ch),
887                None => Err(self.error(start_pos, ErrorKind::NonUnicodeHex)),
888            }
889        }
890    }
891
892    const fn parse_hex_digits(
893        &mut self,
894        start_pos: usize,
895        expected_digits: usize,
896    ) -> Result<char, Error> {
897        let first_digit_pos = self.pos;
898        let mut hex = 0_u32;
899        while self.pos - first_digit_pos < expected_digits {
900            if self.is_eof() {
901                return Err(self.error(start_pos, ErrorKind::UnfinishedEscape));
902            }
903            let digit = self.regex_bytes[self.pos];
904            self.pos += 1; // Advance immediately to get the correct error span
905
906            let digit = match digit {
907                b'0'..=b'9' => digit - b'0',
908                b'a'..=b'f' => digit - b'a' + 10,
909                b'A'..=b'F' => digit - b'A' + 10,
910                _ if self.ignore_whitespace && digit.is_ascii_whitespace() => {
911                    // Return a more precise error
912                    return Err(self.error(self.pos - 1, ErrorKind::DisallowedWhitespace));
913                }
914                b'#' if self.ignore_whitespace => {
915                    // Return a more precise error
916                    return Err(self.error(self.pos - 1, ErrorKind::DisallowedComment));
917                }
918                _ => return Err(self.error(start_pos, ErrorKind::InvalidHex)),
919            };
920            // No overflow can happen.
921            hex = hex * 16 + digit as u32;
922        }
923
924        match char::from_u32(hex) {
925            Some(ch) => Ok(ch),
926            None => Err(self.error(start_pos, ErrorKind::NonUnicodeHex)),
927        }
928    }
929
930    const fn is_eof(&self) -> bool {
931        self.pos == self.regex_bytes.len()
932    }
933
934    const fn parse_group_start_or_flags(&mut self) -> Result<(), Error> {
935        const LOOKAROUND_PREFIXES: &[&[u8]] = &[b"?=", b"?!", b"?<=", b"?<!"];
936        const NAMED_PREFIXES: &[&[u8]] = &[b"?P<", b"?<"];
937
938        // Gobble the opening '('
939        let start_pos = self.pos;
940        debug_assert!(self.regex_bytes[self.pos] == b'(');
941        self.pos += 1;
942
943        let start_ast_idx = self.syntax.len();
944        const_try!(self.push_ast(
945            start_pos,
946            Node::GroupStart {
947                name: None,
948                flags: None,
949            }
950        ));
951        const_try!(self.gobble_whitespace_and_comments());
952
953        if self.is_eof() {
954            return Err(self.error(start_pos, ErrorKind::UnfinishedGroup));
955        }
956
957        let is_lookaround = self.gobble_any(LOOKAROUND_PREFIXES);
958        if is_lookaround {
959            return Err(self.error(start_pos, ErrorKind::LookaroundNotSupported));
960        }
961
962        let name_start = self.pos;
963        let is_named = self.gobble_any(NAMED_PREFIXES);
964        if is_named {
965            let start = Span::new(name_start, self.pos);
966            let name_ast = const_try!(self.parse_capture_name(start));
967            if let SyntaxOrLen::Syntax(syntax) = &mut self.syntax {
968                if let Node::GroupStart { name, .. } = &mut syntax.index_mut(start_ast_idx).node {
969                    *name = Some(name_ast);
970                }
971            }
972
973            const_try!(self.group_names.insert(name_ast.name, self.regex_bytes));
974        }
975
976        let mut spanned_flags = None;
977        let mut is_standalone_flags = false;
978        if !is_named && self.regex_bytes[self.pos] == b'?' {
979            let flags_start = self.pos;
980            let flags = const_try!(self.parse_flags());
981            if !flags.is_empty {
982                spanned_flags = Some((flags, Span::new(flags_start, self.pos)));
983            }
984
985            let ch = self.regex_bytes[self.pos];
986            debug_assert!(ch == b':' || ch == b')');
987
988            if ch == b')' {
989                // Flags for the current group.
990                if flags.is_empty {
991                    // Include the closing `)` in the error span
992                    self.pos += 1;
993                    // Treat `(?)` as missing repetition, same as in `regex-parser`
994                    return Err(self.error(start_pos, ErrorKind::MissingRepetition));
995                }
996                // Do not advance the pos, so that `)` is parsed as the group end and decrements the group depth.
997                is_standalone_flags = true;
998            } else {
999                self.pos += 1;
1000            }
1001        }
1002
1003        if let SyntaxOrLen::Syntax(syntax) = &mut self.syntax {
1004            if let (Node::GroupStart { flags, .. }, Some((_, span))) =
1005                (&mut syntax.index_mut(start_ast_idx).node, &spanned_flags)
1006            {
1007                *flags = Some(*span);
1008            }
1009        }
1010
1011        let new_ignore_whitespace = if let Some((
1012            Flags {
1013                ignore_whitespace: Some(ws),
1014                ..
1015            },
1016            _,
1017        )) = spanned_flags
1018        {
1019            ws
1020        } else {
1021            self.ignore_whitespace
1022        };
1023
1024        let push_result = self.groups.push(GroupFrame {
1025            prev_ignore_whitespace: if is_standalone_flags {
1026                // Will set `ignore_whitespace` in the *surrounding* group once the flags pseudo-group
1027                // is popped below.
1028                new_ignore_whitespace
1029            } else {
1030                let prev = self.ignore_whitespace;
1031                self.ignore_whitespace = new_ignore_whitespace;
1032                prev
1033            },
1034        });
1035        if push_result.is_err() {
1036            return Err(self.error(start_pos, ErrorKind::GroupDepthOverflow));
1037        }
1038
1039        // This works fine with the standalone flags like `(?m-u)` because we immediately close the pseudo-group.
1040        if is_standalone_flags {
1041            const_try!(self.end_group());
1042        }
1043        Ok(())
1044    }
1045
1046    /// Parses a capture name. The parser position is after the opening `<`.
1047    const fn parse_capture_name(&mut self, start: Span) -> Result<GroupName, Error> {
1048        const fn is_capture_char(ch: u8, is_first: bool) -> bool {
1049            if is_first {
1050                ch == b'_' || ch.is_ascii_alphabetic()
1051            } else {
1052                ch == b'_' || ch == b'.' || ch == b'[' || ch == b']' || ch.is_ascii_alphanumeric()
1053            }
1054        }
1055
1056        let start_pos = self.pos;
1057        while self.pos < self.regex_bytes.len() && self.regex_bytes[self.pos] != b'>' {
1058            let ch = self.regex_bytes[self.pos];
1059            if ch > 0x7f {
1060                return Err(self.error(start_pos, ErrorKind::NonAsciiCaptureName));
1061            }
1062
1063            let is_first = start_pos == self.pos;
1064            self.pos += 1;
1065            if !is_capture_char(ch, is_first) {
1066                return Err(self.error(start_pos, ErrorKind::InvalidCaptureName));
1067            }
1068        }
1069
1070        if self.is_eof() {
1071            return Err(self.error(start_pos, ErrorKind::UnfinishedCaptureName));
1072        }
1073        if start_pos == self.pos {
1074            return Err(self.error(start_pos, ErrorKind::EmptyCaptureName));
1075        }
1076
1077        debug_assert!(self.regex_bytes[self.pos] == b'>');
1078        self.pos += 1;
1079
1080        Ok(GroupName {
1081            start,
1082            name: Span::new(start_pos, self.pos - 1),
1083            end: Span::new(self.pos - 1, self.pos),
1084        })
1085    }
1086
1087    /// Parses flags. The parser position is at the opening `?`.
1088    const fn parse_flags(&mut self) -> Result<Flags, Error> {
1089        const KNOWN_FLAGS_COUNT: usize = 7;
1090        const KNOWN_FLAGS: [u8; KNOWN_FLAGS_COUNT] = *b"ximsUuR";
1091
1092        let start_pos = self.pos;
1093        debug_assert!(self.regex_bytes[self.pos] == b'?');
1094        self.pos += 1;
1095
1096        let mut negation = false;
1097        let mut flag_values = [None::<bool>; KNOWN_FLAGS_COUNT];
1098        let mut is_empty = true;
1099        let mut is_empty_negation = true;
1100        while !self.is_eof() {
1101            let ch = self.regex_bytes[self.pos];
1102            if ch == b':' || ch == b')' {
1103                break;
1104            }
1105            self.pos += 1;
1106
1107            if ch == b'-' {
1108                if negation {
1109                    return Err(self.error(self.pos - 1, ErrorKind::RepeatedFlagNegation));
1110                }
1111                negation = true;
1112                continue;
1113            }
1114
1115            let mut i = 0;
1116            while i < KNOWN_FLAGS_COUNT {
1117                if ch == KNOWN_FLAGS[i] {
1118                    break;
1119                }
1120                i += 1;
1121            }
1122            if i == KNOWN_FLAGS_COUNT {
1123                return Err(self.error(self.pos - 1, ErrorKind::UnsupportedFlag));
1124            }
1125
1126            if let Some(prev_value) = flag_values[i] {
1127                let err = ErrorKind::RepeatedFlag {
1128                    contradicting: prev_value == negation,
1129                };
1130                return Err(self.error(self.pos - 1, err));
1131            }
1132            flag_values[i] = Some(!negation);
1133            is_empty = false;
1134            is_empty_negation = !negation;
1135        }
1136
1137        if self.is_eof() {
1138            return Err(self.error(start_pos, ErrorKind::UnfinishedFlags));
1139        }
1140        if negation && is_empty_negation {
1141            return Err(self.error(self.pos - 1, ErrorKind::UnfinishedFlagsNegation));
1142        }
1143
1144        Ok(Flags {
1145            is_empty,
1146            ignore_whitespace: flag_values[0],
1147        })
1148    }
1149
1150    const fn end_group(&mut self) -> Result<(), Error> {
1151        let start_pos = self.pos;
1152        debug_assert!(self.regex_bytes[self.pos] == b')');
1153        self.pos += 1;
1154
1155        if let Some(popped) = self.groups.pop() {
1156            self.ignore_whitespace = popped.prev_ignore_whitespace;
1157        } else {
1158            return Err(self.error(start_pos, ErrorKind::NonMatchingGroupEnd));
1159        }
1160        self.push_ast(start_pos, Node::GroupEnd)
1161    }
1162
1163    /// Parses the start of the set class, including the opening `[` and any specially handled chars
1164    /// (`^`, `-` and `]`).
1165    const fn parse_set_class_start(&mut self) -> Result<(), Error> {
1166        let start_pos = self.pos;
1167        debug_assert!(self.regex_bytes[self.pos] == b'[');
1168        self.pos += 1;
1169
1170        let ast_idx = self.syntax.len();
1171        const_try!(self.push_ast(start_pos, Node::SetStart { negation: None }));
1172
1173        const_try!(self.gobble_whitespace_and_comments());
1174        if self.is_eof() {
1175            return Err(self.error(start_pos, ErrorKind::UnfinishedSet));
1176        }
1177
1178        if self.regex_bytes[self.pos] == b'^' {
1179            self.pos += 1;
1180            let negation_range = Span::new(self.pos - 1, self.pos);
1181            const_try!(self.gobble_whitespace_and_comments());
1182            if self.is_eof() {
1183                return Err(self.error(start_pos, ErrorKind::UnfinishedSet));
1184            }
1185
1186            if let SyntaxOrLen::Syntax(syntax) = &mut self.syntax {
1187                if let Node::SetStart { negation } = &mut syntax.index_mut(ast_idx).node {
1188                    *negation = Some(negation_range);
1189                }
1190            }
1191        }
1192
1193        if self.regex_bytes[self.pos] == b']' {
1194            // Literal ']'
1195            self.pos += 1;
1196            const_try!(self.gobble_whitespace_and_comments());
1197        } else {
1198            // Any amount of literal '-'
1199            while !self.is_eof() && self.regex_bytes[self.pos] == b'-' {
1200                self.pos += 1;
1201                const_try!(self.gobble_whitespace_and_comments());
1202            }
1203        }
1204        Ok(())
1205    }
1206
1207    const fn set_step(&mut self) -> Result<(), Error> {
1208        debug_assert!(self.set_depth > 0);
1209
1210        const_try!(self.gobble_whitespace_and_comments());
1211        if self.is_eof() {
1212            return Err(self.error(self.pos, ErrorKind::UnfinishedSet));
1213        }
1214
1215        let op_start = self.pos;
1216        if self.gobble_any(&[b"&&", b"--", b"~~"]) {
1217            const_try!(self.push_ast(op_start, Node::SetOp));
1218            return Ok(());
1219        }
1220
1221        let ch = self.regex_bytes[self.pos];
1222        match ch {
1223            b'[' => {
1224                // Try parse the ASCII char class first. If that fails, treat it as an embedded class.
1225                if self.try_parse_ascii_class() {
1226                    const_try!(self.push_ast(op_start, Node::AsciiClass));
1227                } else {
1228                    self.pos = op_start; // Restore the parser position
1229                    const_try!(self.parse_set_class_start());
1230                    self.set_depth += 1;
1231                }
1232            }
1233            b']' => {
1234                self.pos += 1;
1235                const_try!(self.push_ast(op_start, Node::SetEnd));
1236                self.set_depth -= 1;
1237            }
1238            _ => const_try!(self.parse_set_class_range()),
1239        }
1240        Ok(())
1241    }
1242
1243    /// Parses an ASCII char class, e.g., `[:alnum:]`. If successful, advances the parser beyond the closing `]`.
1244    ///
1245    /// **Important.** The caller is responsible for rewinding the parser position if `Ok(false)` is returned.
1246    const fn try_parse_ascii_class(&mut self) -> bool {
1247        const CLASSES: &[&[u8]] = &[
1248            b"alnum", b"alpha", b"ascii", b"blank", b"cntrl", b"digit", b"graph", b"lower",
1249            b"print", b"punct", b"space", b"upper", b"word", b"xdigit",
1250        ];
1251
1252        debug_assert!(self.regex_bytes[self.pos] == b'[');
1253        self.pos += 1;
1254
1255        if self.is_eof() || self.regex_bytes[self.pos] != b':' {
1256            return false;
1257        }
1258
1259        self.pos += 1;
1260        if self.is_eof() {
1261            return false;
1262        }
1263        if self.regex_bytes[self.pos] == b'^' {
1264            self.pos += 1;
1265            if self.is_eof() {
1266                return false;
1267            }
1268        }
1269
1270        let is_known_class = self.gobble_any(CLASSES);
1271        // ^ Immediately rewind the parse position so we don't forget about it.
1272        if !is_known_class {
1273            return false;
1274        }
1275
1276        !self.is_eof() && self.gobble(b":]")
1277    }
1278
1279    const fn parse_set_class_range(&mut self) -> Result<(), Error> {
1280        let start_pos = self.pos;
1281        let start_item = const_try!(self.parse_set_class_item());
1282        const_try!(self.gobble_whitespace_and_comments());
1283        if self.is_eof() {
1284            return Err(self.error(self.pos, ErrorKind::UnfinishedSet));
1285        }
1286
1287        let ch = self.regex_bytes[self.pos];
1288        if ch != b'-' || {
1289            let next_ch = self.peek_whitespace(self.pos + 1);
1290            matches!(next_ch, Some(']' | '-'))
1291        } {
1292            // Not a range.
1293            if !start_item.is_valid_set_member() {
1294                return Err(self.error(start_pos, ErrorKind::InvalidEscapeInSet));
1295            }
1296            return Ok(());
1297        }
1298
1299        let PrimitiveKind::Literal(range_start) = start_item else {
1300            return Err(self.error(start_pos, ErrorKind::InvalidRangeStart));
1301        };
1302
1303        debug_assert!(ch == b'-');
1304        self.pos += 1;
1305        const_try!(self.push_ast(self.pos - 1, Node::SetRange));
1306        const_try!(self.gobble_whitespace_and_comments());
1307
1308        let end_start_pos = self.pos;
1309        let PrimitiveKind::Literal(range_end) = const_try!(self.parse_set_class_item()) else {
1310            return Err(self.error(end_start_pos, ErrorKind::InvalidRangeEnd));
1311        };
1312
1313        if range_start > range_end {
1314            return Err(self.error(start_pos, ErrorKind::InvalidRange));
1315        }
1316        Ok(())
1317    }
1318
1319    const fn parse_set_class_item(&mut self) -> Result<PrimitiveKind, Error> {
1320        let Some((ch, next_pos)) = split_first_char(self.regex_bytes, self.pos) else {
1321            return Err(self.error(self.pos, ErrorKind::UnfinishedSet));
1322        };
1323        if ch == '\\' {
1324            self.parse_escape()
1325        } else {
1326            self.pos = next_pos;
1327            Ok(PrimitiveKind::Literal(ch))
1328        }
1329    }
1330
1331    pub(crate) const fn step(&mut self) -> Result<ops::ControlFlow<()>, Error> {
1332        if self.set_depth > 0 {
1333            const_try!(self.set_step());
1334            return Ok(ops::ControlFlow::Continue(()));
1335        }
1336
1337        const_try!(self.gobble_whitespace_and_comments());
1338
1339        let Some((current_ch, next_pos)) = split_first_char(self.regex_bytes, self.pos) else {
1340            if !self.groups.is_empty() {
1341                return Err(self.error(self.regex_bytes.len(), ErrorKind::UnfinishedGroup));
1342            }
1343            return Ok(ops::ControlFlow::Break(()));
1344        };
1345        match current_ch {
1346            '(' => {
1347                const_try!(self.parse_group_start_or_flags());
1348                // This works with standalone flags (e.g., `(?-x)`) as well; it's invalid to have a repetition after these.
1349                self.is_empty_last_item = true;
1350            }
1351            ')' => {
1352                const_try!(self.end_group());
1353                self.is_empty_last_item = false;
1354            }
1355            '|' => {
1356                // Gobble the alteration.
1357                self.pos += 1;
1358                const_try!(self.push_ast(self.pos - 1, Node::Alteration));
1359                self.is_empty_last_item = true;
1360            }
1361            '[' => {
1362                const_try!(self.parse_set_class_start());
1363                self.set_depth = 1;
1364                self.is_empty_last_item = false;
1365            }
1366            '?' | '*' | '+' => {
1367                const_try!(self.parse_uncounted_repetition());
1368                self.is_empty_last_item = false;
1369            }
1370            '{' => {
1371                const_try!(self.parse_counted_repetition());
1372                self.is_empty_last_item = false;
1373            }
1374            _ => {
1375                const_try!(self.parse_primitive(current_ch, next_pos));
1376                self.is_empty_last_item = false;
1377            }
1378        }
1379        Ok(ops::ControlFlow::Continue(()))
1380    }
1381
1382    pub(crate) const fn into_syntax(self) -> Syntax<CAP> {
1383        match self.syntax {
1384            SyntaxOrLen::Syntax(syntax) => syntax,
1385            SyntaxOrLen::Len(_) => Syntax::new(Spanned::DUMMY),
1386        }
1387    }
1388}