compile_regex/
ast.rs

1//! AST definitions. Because of `const`-ness, we support only very primitive AST spanning.
2
3use core::{fmt, ops};
4
5use crate::utils::Stack;
6
7/// Range of chars. Similar to `Range<usize>`, but implements `Copy`.
8#[derive(Clone, Copy, PartialEq)]
9pub struct Span {
10    /// Start of the range, inclusive.
11    pub start: usize,
12    /// End of the range, exclusive.
13    pub end: usize,
14}
15
16impl fmt::Debug for Span {
17    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
18        write!(formatter, "{}..{}", self.start, self.end)
19    }
20}
21
22impl From<Span> for ops::Range<usize> {
23    fn from(range: Span) -> Self {
24        range.start..range.end
25    }
26}
27
28impl ops::Index<Span> for str {
29    type Output = str;
30
31    fn index(&self, index: Span) -> &Self::Output {
32        &self[ops::Range::from(index)]
33    }
34}
35
36impl Span {
37    #[doc(hidden)] // used in fuzz tests; logically private
38    pub const fn new(start: usize, end: usize) -> Self {
39        assert!(start <= end);
40        Self { start, end }
41    }
42
43    pub(crate) const fn is_empty(&self) -> bool {
44        self.end == self.start
45    }
46
47    pub(crate) const fn len(&self) -> usize {
48        self.end - self.start
49    }
50}
51
52/// Information about a counted repetition, e.g. `{2}`, `{2,3}`, or `{2,}`.
53#[derive(Debug, Clone, Copy, PartialEq)]
54pub enum CountedRepetition {
55    /// Repetition with an exact count, e.g. `{2}`.
56    Exactly(Span),
57    /// Repetition with a minimum count, e.g. `{2,}`.
58    AtLeast(Span),
59    /// Repetition with a range of counts, e.g. `{2,3}`.
60    Between(Span, Span),
61}
62
63/// Syntax node of a regular expression.
64#[derive(Debug, Clone, Copy, PartialEq)]
65#[non_exhaustive]
66pub enum Node {
67    /// Any char: `.`
68    Dot,
69    /// Zero-length line assertion: `^`, `$`.
70    LineAssertion,
71    /// Perl character class: `\d`, `\s`, `\w` etc.
72    PerlClass,
73    /// Escaped literal with special meaning, such as `\n` or `\t`.
74    EscapedLiteral,
75    /// Escaped char without special meaning, such as `\*`.
76    EscapedChar {
77        /// Must the char be escaped?
78        meta: bool,
79    },
80    /// Standard assertion like `\A` (beginning of the haystack) or `\<` (start-of-word boundary).
81    StdAssertion,
82    /// Uncounted repetition, like `*`, `+` or `?`, with an optional non-greedy marker.
83    UncountedRepetition,
84    /// Counted repetition, like `{3}` or `{3,5}`, with an optional non-greedy marker.
85    CountedRepetition(CountedRepetition),
86    /// Hexadecimal escape, like `\x0C` or `\u{123}`.
87    HexEscape,
88
89    /// Alteration char (`|`).
90    Alteration,
91    /// Group start `(` together with optional flags and naming.
92    GroupStart {
93        /// Group name.
94        name: Option<GroupName>,
95        /// Flags for the current group, e.g. `?x-m` in `(?x-m)` or in `(?x-m:.*)`.
96        /// By design, this is mutually exclusive with `name`.
97        flags: Option<Span>,
98    },
99    /// Group end `)`.
100    GroupEnd,
101    /// Set start `[`.
102    SetStart {
103        /// Negation `^`.
104        negation: Option<Span>,
105    },
106    /// Set end `]`.
107    SetEnd,
108    /// Set operation: `&&`, `--` or `~~`.
109    SetOp,
110    /// Set range char: `-`.
111    SetRange,
112    /// ASCII char class, e.g., `[:alnum:]`.
113    AsciiClass,
114    /// Comment, e.g., `# Test`. May span over multiple lines, where comments may be preceded by whitespace
115    /// (but no AST nodes).
116    Comment,
117}
118
119/// Information about a group / capture name.
120#[derive(Debug, Clone, Copy, PartialEq)]
121pub struct GroupName {
122    /// Span of the start marker, i.e., `?<` or `?P<`.
123    pub start: Span,
124    /// Span of the name.
125    pub name: Span,
126    /// Position of the end marker, i.e. `>`.
127    pub end: Span,
128}
129
130/// Spanned syntax node.
131#[derive(Debug, Clone, Copy, PartialEq)]
132pub struct Spanned {
133    /// Syntax node.
134    pub node: Node,
135    /// Byte span of the regex string that maps to the node.
136    pub span: Span,
137}
138
139impl Spanned {
140    pub(crate) const DUMMY: Self = Self {
141        node: Node::Dot,
142        span: Span { start: 0, end: 0 },
143    };
144}
145
146/// Linearized syntax tree.
147pub type Syntax<const LEN: usize = 128> = Stack<Spanned, LEN>;