compile_regex/
errors.rs

1//! Error types.
2
3use core::{fmt, ops, str};
4
5use compile_fmt::{compile_panic, Ascii};
6
7/// Error when parsing / validating regular expressions.
8#[derive(Debug)]
9pub struct Error {
10    pos: ops::Range<usize>,
11    kind: ErrorKind,
12}
13
14impl fmt::Display for Error {
15    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
16        write!(formatter, "invalid regex at {:?}: {}", self.pos, self.kind)
17    }
18}
19
20#[cfg(feature = "std")]
21impl std::error::Error for Error {}
22
23impl Error {
24    /// Returns the kind of this error.
25    pub fn kind(&self) -> &ErrorKind {
26        &self.kind
27    }
28
29    /// Returns byte offsets in the regex string that correspond to this error.
30    pub fn pos(&self) -> ops::Range<usize> {
31        self.pos.clone()
32    }
33
34    #[track_caller]
35    pub(crate) const fn compile_panic(self, regex: &str) -> ! {
36        let (_, hl) = regex.as_bytes().split_at(self.pos.start);
37        let (hl, _) = hl.split_at(self.pos.end - self.pos.start);
38        let Ok(hl) = str::from_utf8(hl) else {
39            panic!("internal error: invalid error range");
40        };
41
42        compile_panic!(
43            "invalid regex at ",
44            self.pos.start => compile_fmt::fmt::<usize>(), "..", self.pos.end => compile_fmt::fmt::<usize>(),
45            " ('", hl => compile_fmt::clip(64, "…"),
46            "'): ", self.kind.as_ascii_str() => compile_fmt::clip_ascii(32, "")
47        );
48    }
49}
50
51/// Kind of a regex validation [`Error`].
52#[derive(Debug)]
53#[non_exhaustive]
54pub enum ErrorKind {
55    /// Missing node for repetition, e.g. in `*`.
56    MissingRepetition,
57    /// Unfinished repetition, e.g. in `.{`.
58    UnfinishedRepetition,
59    /// Empty decimal in a counted repetition, e.g. in `.{}`.
60    EmptyDecimal,
61    /// Invalid decimal in a counted repetition, e.g. in `.{2x}`.
62    InvalidDecimal,
63    /// Empty hexadecimal escape, e.g. `\x{}`.
64    EmptyHex,
65    /// Invalid hexadecimal escape, e.g. `\u{what}`.
66    InvalidHex,
67    /// Hexadecimal escape does not map to a Unicode char, e.g. `\U99999999`.
68    NonUnicodeHex,
69    /// Invalid counted repetition range, e.g. in `.{3,2}`.
70    InvalidRepetitionRange,
71    /// Unfinished escape, e.g. `\u1`.
72    UnfinishedEscape,
73    /// Backreferences, e.g. `\1`, are not supported (same as in the `regex` crate).
74    UnsupportedBackref,
75    /// Unsupported escape, e.g. `\Y`.
76    UnsupportedEscape,
77    /// Unfinished word boundary, e.g. `\b{start`.
78    UnfinishedWordBoundary,
79    /// Unknown word boundary, e.g. `\b{what}`.
80    UnknownWordBoundary,
81    /// Unicode classes like `\pN` or `\p{Digit}` are not supported.
82    UnicodeClassesNotSupported,
83    /// Lookaround groups are not supported (same as in the `regex` crate).
84    LookaroundNotSupported,
85    /// Unfinished capture name, e.g. in `(?<what`.
86    UnfinishedCaptureName,
87    /// Empty capture name, e.g. in `(?P<>.)`.
88    EmptyCaptureName,
89    /// Invalid capture name, e.g. in `(?< what >.)`.
90    InvalidCaptureName,
91    /// Non-ASCII chars in the capture name.
92    NonAsciiCaptureName,
93    /// Duplicate capture name, e.g., in `(?<test>.)(?<test>.)`.
94    DuplicateCaptureName {
95        /// Byte range of the previous capture name definition.
96        prev_pos: ops::Range<usize>,
97    },
98    /// Unfinished group, e.g. in `(.`.
99    UnfinishedGroup,
100    /// Non-matching group end, e.g. in `(.))`.
101    NonMatchingGroupEnd,
102    /// Unfinished set, e.g. in `[0-9`.
103    UnfinishedSet,
104    /// Invalid set range start, e.g. in `[\d-9]` (`\d` doesn't correspond to a single char).
105    InvalidRangeStart,
106    /// Invalid set range end, e.g. in `[0-\D]` (`\D` doesn't correspond to a single char).
107    InvalidRangeEnd,
108    /// Invalid range, e.g., in `[9-0]`.
109    InvalidRange,
110    /// Invalid escape encountered in a character set, e.g. in `[0\b]` (`\b` is an *assertion*, it doesn't map to a char
111    /// or a set of chars).
112    InvalidEscapeInSet,
113    /// Unfinished flags, e.g., `(?x`.
114    UnfinishedFlags,
115    /// Unfinished negation in flags, e.g. `(?-)`.
116    UnfinishedFlagsNegation,
117    /// Repeated negation in flags, e.g. `(?--x)`.
118    RepeatedFlagNegation,
119    /// Unsupported flag, e.g. in `(?Y)`.
120    UnsupportedFlag,
121    /// Repeated flag, e.g. in `(?xx)`.
122    RepeatedFlag {
123        /// Do the flag mentions contradict each other?
124        contradicting: bool,
125    },
126
127    /// Disallowed whitespace, e.g. in `\u{1 2 3}`. This is technically supported by `regex`,
128    /// but makes literals harder to read.
129    DisallowedWhitespace,
130    /// Disallowed comment, e.g.
131    ///
132    /// ```text
133    /// \U{1# one!
134    /// 23}
135    /// ```
136    ///
137    /// This is technically supported by `regex`, but makes literals harder to read.
138    DisallowedComment,
139
140    /// Regex contains too many spans for the capacity specified in [`RegexOptions::parse()`](crate::RegexOptions::parse()) etc.
141    AstOverflow,
142    /// Regex contains too deeply nested groups.
143    GroupDepthOverflow,
144    /// Regex contains too many named captures / groups.
145    NamedGroupOverflow,
146}
147
148impl fmt::Display for ErrorKind {
149    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
150        formatter.write_str(self.as_str())
151    }
152}
153
154impl ErrorKind {
155    pub(crate) const fn with_position(self, pos: ops::Range<usize>) -> Error {
156        Error { pos, kind: self }
157    }
158
159    const fn as_str(&self) -> &'static str {
160        match self {
161            Self::MissingRepetition => "missing repetition",
162            Self::UnfinishedRepetition => "unfinished repetition",
163            Self::EmptyDecimal => "empty decimal number",
164            Self::InvalidDecimal => "invalid decimal number",
165            Self::EmptyHex => "empty hex escape",
166            Self::InvalidHex => "invalid hex escape",
167            Self::NonUnicodeHex => "non-Unicode hex escape",
168            Self::InvalidRepetitionRange => "invalid repetition range",
169            Self::UnfinishedEscape => "unfinished escape",
170            Self::UnsupportedBackref => "backreferences (e.g., \\1) are not supported",
171            Self::UnsupportedEscape => "escape is not supported",
172            Self::UnfinishedWordBoundary => "unfinished word boundary",
173            Self::UnknownWordBoundary => "unknown word boundary",
174            Self::UnicodeClassesNotSupported => "Unicode classes are not supported",
175            Self::LookaroundNotSupported => "lookaround groups are not supported",
176            Self::UnfinishedCaptureName => "unfinished capture name",
177            Self::EmptyCaptureName => "empty capture name",
178            Self::InvalidCaptureName => "invalid capture name",
179            Self::NonAsciiCaptureName => "non-ASCII capture names are not supported",
180            Self::DuplicateCaptureName { .. } => "duplicate capture name",
181            Self::UnfinishedGroup => "unfinished group",
182            Self::NonMatchingGroupEnd => "non-matching group end",
183            Self::GroupDepthOverflow => "too deeply nested group",
184            Self::UnfinishedSet => "unfinished set",
185            Self::InvalidEscapeInSet => "invalid escape in set [..]",
186            Self::InvalidRangeStart => "invalid range start",
187            Self::InvalidRangeEnd => "invalid range end",
188            Self::InvalidRange => "invalid range",
189            Self::UnfinishedFlags => "unfinished flags",
190            Self::UnfinishedFlagsNegation => "unfinished flags negation",
191            Self::RepeatedFlagNegation => "repeated flag negation",
192            Self::UnsupportedFlag => "unsupported flag",
193            Self::RepeatedFlag {
194                contradicting: true,
195            } => "contradicting flag value",
196            Self::RepeatedFlag {
197                contradicting: false,
198            } => "repeated flag value",
199            Self::DisallowedWhitespace => "disallowed whitespace (e.g., inside a hex escape)",
200            Self::DisallowedComment => "disallowed comment (e.g., inside a hex escape)",
201            Self::AstOverflow => "too many AST nodes",
202            Self::NamedGroupOverflow => "too many named groups",
203        }
204    }
205
206    const fn as_ascii_str(&self) -> Ascii<'static> {
207        Ascii::new(self.as_str())
208    }
209}