compile_regex/ast.rs
1//! AST definitions. Because of `const`-ness, we support only very primitive AST spanning.
2
3use core::{fmt, ops};
4
5use crate::utils::Stack;
6
7/// Range of chars. Similar to `Range<usize>`, but implements `Copy`.
8#[derive(Clone, Copy, PartialEq)]
9pub struct Span {
10 /// Start of the range, inclusive.
11 pub start: usize,
12 /// End of the range, exclusive.
13 pub end: usize,
14}
15
16impl fmt::Debug for Span {
17 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
18 write!(formatter, "{}..{}", self.start, self.end)
19 }
20}
21
22impl From<Span> for ops::Range<usize> {
23 fn from(range: Span) -> Self {
24 range.start..range.end
25 }
26}
27
28impl ops::Index<Span> for str {
29 type Output = str;
30
31 fn index(&self, index: Span) -> &Self::Output {
32 &self[ops::Range::from(index)]
33 }
34}
35
36impl Span {
37 #[doc(hidden)] // used in fuzz tests; logically private
38 pub const fn new(start: usize, end: usize) -> Self {
39 assert!(start <= end);
40 Self { start, end }
41 }
42
43 pub(crate) const fn is_empty(&self) -> bool {
44 self.end == self.start
45 }
46
47 pub(crate) const fn len(&self) -> usize {
48 self.end - self.start
49 }
50}
51
52/// Information about a counted repetition, e.g. `{2}`, `{2,3}`, or `{2,}`.
53#[derive(Debug, Clone, Copy, PartialEq)]
54pub enum CountedRepetition {
55 /// Repetition with an exact count, e.g. `{2}`.
56 Exactly(Span),
57 /// Repetition with a minimum count, e.g. `{2,}`.
58 AtLeast(Span),
59 /// Repetition with a range of counts, e.g. `{2,3}`.
60 Between(Span, Span),
61}
62
63/// Syntax node of a regular expression.
64#[derive(Debug, Clone, Copy, PartialEq)]
65#[non_exhaustive]
66pub enum Node {
67 /// Any char: `.`
68 Dot,
69 /// Zero-length line assertion: `^`, `$`.
70 LineAssertion,
71 /// Perl character class: `\d`, `\s`, `\w` etc.
72 PerlClass,
73 /// Escaped literal with special meaning, such as `\n` or `\t`.
74 EscapedLiteral,
75 /// Escaped char without special meaning, such as `\*`.
76 EscapedChar {
77 /// Must the char be escaped?
78 meta: bool,
79 },
80 /// Standard assertion like `\A` (beginning of the haystack) or `\<` (start-of-word boundary).
81 StdAssertion,
82 /// Uncounted repetition, like `*`, `+` or `?`, with an optional non-greedy marker.
83 UncountedRepetition,
84 /// Counted repetition, like `{3}` or `{3,5}`, with an optional non-greedy marker.
85 CountedRepetition(CountedRepetition),
86 /// Hexadecimal escape, like `\x0C` or `\u{123}`.
87 HexEscape,
88
89 /// Alteration char (`|`).
90 Alteration,
91 /// Group start `(` together with optional flags and naming.
92 GroupStart {
93 /// Group name.
94 name: Option<GroupName>,
95 /// Flags for the current group, e.g. `?x-m` in `(?x-m)` or in `(?x-m:.*)`.
96 /// By design, this is mutually exclusive with `name`.
97 flags: Option<Span>,
98 },
99 /// Group end `)`.
100 GroupEnd,
101 /// Set start `[`.
102 SetStart {
103 /// Negation `^`.
104 negation: Option<Span>,
105 },
106 /// Set end `]`.
107 SetEnd,
108 /// Set operation: `&&`, `--` or `~~`.
109 SetOp,
110 /// Set range char: `-`.
111 SetRange,
112 /// ASCII char class, e.g., `[:alnum:]`.
113 AsciiClass,
114 /// Comment, e.g., `# Test`. May span over multiple lines, where comments may be preceded by whitespace
115 /// (but no AST nodes).
116 Comment,
117}
118
119/// Information about a group / capture name.
120#[derive(Debug, Clone, Copy, PartialEq)]
121pub struct GroupName {
122 /// Span of the start marker, i.e., `?<` or `?P<`.
123 pub start: Span,
124 /// Span of the name.
125 pub name: Span,
126 /// Position of the end marker, i.e. `>`.
127 pub end: Span,
128}
129
130/// Spanned syntax node.
131#[derive(Debug, Clone, Copy, PartialEq)]
132pub struct Spanned {
133 /// Syntax node.
134 pub node: Node,
135 /// Byte span of the regex string that maps to the node.
136 pub span: Span,
137}
138
139impl Spanned {
140 pub(crate) const DUMMY: Self = Self {
141 node: Node::Dot,
142 span: Span { start: 0, end: 0 },
143 };
144}
145
146/// Linearized syntax tree.
147pub type Syntax<const LEN: usize = 128> = Stack<Spanned, LEN>;