term_transcript/test/parser/
mod.rs

1//! SVG parsing logic.
2
3use std::{
4    borrow::Cow,
5    error::Error as StdError,
6    fmt,
7    io::{self, BufRead},
8    mem,
9    num::ParseIntError,
10    ops,
11    str::{self, Utf8Error},
12};
13
14use quick_xml::{
15    encoding::EncodingError,
16    events::{attributes::Attributes, Event},
17    Reader as XmlReader,
18};
19use termcolor::WriteColor;
20
21use self::text::TextReadingState;
22use crate::{
23    test::color_diff::ColorSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput,
24};
25
26#[cfg(test)]
27mod tests;
28mod text;
29
30fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
31    quick_xml::Error::Encoding(EncodingError::Utf8(err))
32}
33
34/// Parsed terminal output.
35#[derive(Debug, Clone, Default)]
36pub struct Parsed {
37    pub(crate) plaintext: String,
38    pub(crate) color_spans: Vec<ColorSpan>,
39}
40
41impl Parsed {
42    const DEFAULT: Self = Self {
43        plaintext: String::new(),
44        color_spans: Vec::new(),
45    };
46
47    /// Returns the parsed plaintext.
48    pub fn plaintext(&self) -> &str {
49        &self.plaintext
50    }
51
52    /// Writes the parsed text with coloring / styles applied.
53    ///
54    /// # Errors
55    ///
56    /// - Returns an I/O error should it occur when writing to `out`.
57    #[doc(hidden)] // makes `termcolor` dependency public, which we want to avoid so far
58    pub fn write_colorized(&self, out: &mut impl WriteColor) -> io::Result<()> {
59        ColorSpan::write_colorized(&self.color_spans, out, &self.plaintext)
60    }
61
62    /// Converts this parsed fragment into text for `UserInput`. This takes into account
63    /// that while the first space after prompt is inserted automatically, the further whitespace
64    /// may be significant.
65    fn into_input_text(self) -> String {
66        let mut text = if self.plaintext.starts_with(' ') {
67            self.plaintext[1..].to_owned()
68        } else {
69            self.plaintext
70        };
71
72        if text.ends_with('\n') {
73            text.pop();
74        }
75        text
76    }
77
78    fn trim_ending_newline(&mut self) {
79        if self.plaintext.ends_with('\n') {
80            self.plaintext.pop();
81            if let Some(last_span) = self.color_spans.last_mut() {
82                last_span.len -= 1;
83            }
84        }
85    }
86}
87
88impl TermOutput for Parsed {}
89
90impl Transcript<Parsed> {
91    /// Parses a transcript from the provided `reader`, which should point to an SVG XML tree
92    /// produced by [`Template::render()`] (possibly within a larger document).
93    ///
94    /// # Errors
95    ///
96    /// - Returns an error if the input cannot be parsed, usually because it was not produced
97    ///   by `Template::render()`.
98    ///
99    /// [`Template::render()`]: crate::svg::Template::render()
100    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
101    pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, LocatedParseError> {
102        let mut reader = XmlReader::from_reader(reader);
103        let mut buffer = vec![];
104        let mut state = ParserState::Initialized;
105        let mut transcript = Self::new();
106        let mut open_tags = 0;
107
108        #[allow(clippy::cast_possible_truncation)] // Truncation shouldn't happen in practice
109        loop {
110            let prev_position = reader.buffer_position() as usize;
111            let event = reader
112                .read_event_into(&mut buffer)
113                .map_err(|err| LocatedParseError::new(err.into(), prev_position..prev_position))?;
114            let event_position = prev_position..reader.buffer_position() as usize;
115            match &event {
116                Event::Start(_) => {
117                    open_tags += 1;
118                }
119                Event::End(_) => {
120                    open_tags -= 1;
121                    if open_tags == 0 {
122                        break;
123                    }
124                }
125                Event::Eof => break,
126                _ => { /* Do nothing. */ }
127            }
128
129            let maybe_interaction = state
130                .process(event, event_position.clone())
131                .map_err(|err| LocatedParseError::new(err, event_position))?;
132            if let Some(interaction) = maybe_interaction {
133                #[cfg(feature = "tracing")]
134                tracing::debug!(
135                    ?interaction.input,
136                    interaction.output = ?interaction.output.plaintext,
137                    ?interaction.exit_status,
138                    "parsed interaction"
139                );
140                transcript.interactions.push(interaction);
141            }
142        }
143
144        match state {
145            ParserState::EncounteredContainer => Ok(transcript),
146            ParserState::EncounteredUserInput(interaction) => {
147                transcript.interactions.push(interaction);
148                Ok(transcript)
149            }
150            #[allow(clippy::cast_possible_truncation)] // Shouldn't happen in practice
151            _ => {
152                let pos = reader.buffer_position() as usize;
153                Err(LocatedParseError::new(ParseError::UnexpectedEof, pos..pos))
154            }
155        }
156    }
157}
158
159fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
160    let mut class = None;
161    for attr in attributes {
162        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
163        if attr.key.as_ref() == b"class" {
164            class = Some(attr.value);
165        }
166    }
167    Ok(class.unwrap_or(Cow::Borrowed(b"")))
168}
169
170fn extract_base_class(classes: &[u8]) -> &[u8] {
171    let space_idx = classes.iter().position(|&ch| ch == b' ');
172    space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
173}
174
175fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
176    let mut exit_status = None;
177    for attr in attributes {
178        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
179        if attr.key.as_ref() == b"data-exit-status" {
180            let status =
181                str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
182            let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
183            exit_status = Some(ExitStatus(status));
184        }
185    }
186    Ok(exit_status)
187}
188
189/// Errors that can occur during parsing SVG transcripts.
190#[derive(Debug)]
191#[non_exhaustive]
192pub enum ParseError {
193    /// Unexpected root XML tag; must be `<svg>`.
194    UnexpectedRoot(String),
195    /// Invalid transcript container.
196    InvalidContainer,
197    /// Invalid recorded exit status of an executed command.
198    InvalidExitStatus(ParseIntError),
199    /// Unexpected end of file.
200    UnexpectedEof,
201    /// Invalid hard line break.
202    InvalidHardBreak,
203    /// Error parsing XML.
204    Xml(quick_xml::Error),
205}
206
207impl From<quick_xml::Error> for ParseError {
208    fn from(err: quick_xml::Error) -> Self {
209        Self::Xml(err)
210    }
211}
212
213impl From<io::Error> for ParseError {
214    fn from(err: io::Error) -> Self {
215        Self::Xml(err.into())
216    }
217}
218
219impl fmt::Display for ParseError {
220    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
221        match self {
222            Self::UnexpectedRoot(tag_name) => write!(
223                formatter,
224                "unexpected root XML tag: <{tag_name}>; expected <svg>"
225            ),
226            Self::InvalidContainer => formatter.write_str("invalid transcript container"),
227            Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
228            Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
229            Self::InvalidHardBreak => formatter.write_str("invalid hard line break"),
230            Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
231        }
232    }
233}
234
235impl StdError for ParseError {
236    fn source(&self) -> Option<&(dyn StdError + 'static)> {
237        match self {
238            Self::Xml(err) => Some(err),
239            Self::InvalidExitStatus(err) => Some(err),
240            _ => None,
241        }
242    }
243}
244
245/// [`ParseError`] together with its location in the XML input.
246#[derive(Debug)]
247pub struct LocatedParseError {
248    inner: ParseError,
249    location: ops::Range<usize>,
250}
251
252impl LocatedParseError {
253    fn new(inner: ParseError, location: ops::Range<usize>) -> Self {
254        Self { inner, location }
255    }
256
257    /// Returns a reference to the contained [`ParseError`].
258    pub fn inner(&self) -> &ParseError {
259        &self.inner
260    }
261
262    /// Returns the error location as the starting and ending byte offsets in the input.
263    pub fn location(&self) -> ops::Range<usize> {
264        self.location.clone()
265    }
266
267    /// Unwraps the contained parse error.
268    pub fn into_inner(self) -> ParseError {
269        self.inner
270    }
271}
272
273impl fmt::Display for LocatedParseError {
274    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
275        let Self { inner, location } = self;
276        write!(formatter, "at {}-{}: {inner}", location.start, location.end)
277    }
278}
279
280impl StdError for LocatedParseError {
281    fn source(&self) -> Option<&(dyn StdError + 'static)> {
282        self.inner.source()
283    }
284}
285
286#[derive(Debug)]
287struct UserInputState {
288    exit_status: Option<ExitStatus>,
289    is_hidden: bool,
290    text: TextReadingState,
291    prompt: Option<Cow<'static, str>>,
292    prompt_open_tags: Option<usize>,
293}
294
295impl UserInputState {
296    fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
297        Self {
298            exit_status,
299            is_hidden,
300            text: TextReadingState::default(),
301            prompt: None,
302            prompt_open_tags: None,
303        }
304    }
305}
306
307impl UserInputState {
308    /// Can prompt reading be started now?
309    fn can_start_prompt(&self) -> bool {
310        self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
311    }
312
313    fn can_end_prompt(&self) -> bool {
314        self.prompt.is_none()
315            && self
316                .prompt_open_tags
317                .is_some_and(|tags| tags + 1 == self.text.open_tags())
318    }
319
320    fn process(
321        &mut self,
322        event: Event<'_>,
323        position: ops::Range<usize>,
324    ) -> Result<Option<Interaction<Parsed>>, ParseError> {
325        let mut is_prompt_end = false;
326        if let Event::Start(tag) = &event {
327            if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
328                // Got prompt start.
329                self.prompt_open_tags = Some(self.text.open_tags());
330            }
331        } else if let Event::End(_) = &event {
332            if self.can_end_prompt() {
333                is_prompt_end = true;
334            }
335        }
336
337        let maybe_parsed = self.text.process(event, position)?;
338        if is_prompt_end {
339            if let Some(parsed) = maybe_parsed {
340                // Special case: user input consists of the prompt only.
341                let input = UserInput {
342                    text: String::new(),
343                    prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
344                    hidden: self.is_hidden,
345                };
346                return Ok(Some(Interaction {
347                    input,
348                    output: Parsed::default(),
349                    exit_status: self.exit_status,
350                }));
351            }
352            let text = mem::take(&mut self.text.plaintext_buffer);
353            self.prompt = Some(UserInput::intern_prompt(text));
354        }
355
356        Ok(maybe_parsed.map(|parsed| {
357            let input = UserInput {
358                text: parsed.into_input_text(),
359                prompt: self.prompt.take(),
360                hidden: self.is_hidden,
361            };
362            Interaction {
363                input,
364                output: Parsed::default(),
365                exit_status: self.exit_status,
366            }
367        }))
368    }
369}
370
371/// States of the FSM for parsing SVGs.
372#[derive(Debug)]
373enum ParserState {
374    /// Initial state.
375    Initialized,
376    /// Encountered `<svg>` tag; searching for `<div class="container">`.
377    EncounteredSvgTag,
378    /// Encountered `<div class="container">`; searching for `<div class="input">`.
379    EncounteredContainer,
380    /// Reading user input (`<div class="input">` contents).
381    ReadingUserInput(UserInputState),
382    /// Finished reading user input; searching for `<div class="output">`.
383    EncounteredUserInput(Interaction<Parsed>),
384    /// Reading terminal output (`<div class="output">` contents).
385    ReadingTermOutput(Interaction<Parsed>, TextReadingState),
386}
387
388impl ParserState {
389    const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
390        input: UserInput {
391            text: String::new(),
392            prompt: None,
393            hidden: false,
394        },
395        output: Parsed::DEFAULT,
396        exit_status: None,
397    };
398
399    #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
400    fn set_state(&mut self, new_state: Self) {
401        *self = new_state;
402    }
403
404    #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
405    fn process(
406        &mut self,
407        event: Event<'_>,
408        position: ops::Range<usize>,
409    ) -> Result<Option<Interaction<Parsed>>, ParseError> {
410        match self {
411            Self::Initialized => {
412                if let Event::Start(tag) = event {
413                    if tag.name().as_ref() == b"svg" {
414                        *self = Self::EncounteredSvgTag;
415                    } else {
416                        let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
417                        return Err(ParseError::UnexpectedRoot(tag_name));
418                    }
419                }
420            }
421
422            Self::EncounteredSvgTag => {
423                if let Event::Start(tag) = event {
424                    if tag.name().as_ref() == b"div" {
425                        Self::verify_container_attrs(tag.attributes())?;
426                        self.set_state(Self::EncounteredContainer);
427                    } else if tag.name().as_ref() == b"text"
428                        && Self::is_text_container(tag.attributes())?
429                    {
430                        self.set_state(Self::EncounteredContainer);
431                    }
432                }
433            }
434
435            Self::EncounteredContainer => {
436                if let Event::Start(tag) = event {
437                    let classes = parse_classes(tag.attributes())?;
438                    if Self::is_input_class(extract_base_class(&classes)) {
439                        let is_hidden = classes
440                            .split(|byte| *byte == b' ')
441                            .any(|chunk| chunk == b"input-hidden");
442                        let exit_status = parse_exit_status(tag.attributes())?;
443                        self.set_state(Self::ReadingUserInput(UserInputState::new(
444                            exit_status,
445                            is_hidden,
446                        )));
447                    }
448                }
449            }
450
451            Self::ReadingUserInput(state) => {
452                if let Some(interaction) = state.process(event, position)? {
453                    self.set_state(Self::EncounteredUserInput(interaction));
454                }
455            }
456
457            Self::EncounteredUserInput(interaction) => {
458                if let Event::Start(tag) = event {
459                    let classes = parse_classes(tag.attributes())?;
460                    let base_class = extract_base_class(&classes);
461
462                    if Self::is_output_class(base_class) {
463                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
464                        self.set_state(Self::ReadingTermOutput(
465                            interaction,
466                            TextReadingState::default(),
467                        ));
468                    } else if Self::is_input_class(base_class) {
469                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
470                        let exit_status = parse_exit_status(tag.attributes())?;
471                        let is_hidden = classes
472                            .split(|byte| *byte == b' ')
473                            .any(|chunk| chunk == b"input-hidden");
474                        self.set_state(Self::ReadingUserInput(UserInputState::new(
475                            exit_status,
476                            is_hidden,
477                        )));
478                        return Ok(Some(interaction));
479                    }
480                }
481            }
482
483            Self::ReadingTermOutput(interaction, text_state) => {
484                if let Some(term_output) = text_state.process(event, position)? {
485                    let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
486                    interaction.output = term_output;
487                    self.set_state(Self::EncounteredContainer);
488                    return Ok(Some(interaction));
489                }
490            }
491        }
492        Ok(None)
493    }
494
495    fn is_input_class(class_name: &[u8]) -> bool {
496        class_name == b"input" || class_name == b"user-input"
497    }
498
499    fn is_output_class(class_name: &[u8]) -> bool {
500        class_name == b"output" || class_name == b"term-output"
501    }
502
503    #[cfg_attr(
504        feature = "tracing",
505        tracing::instrument(level = "debug", skip_all, err)
506    )]
507    fn verify_container_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
508        const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
509
510        let mut has_ns_attribute = false;
511        let mut has_class_attribute = false;
512
513        for attr in attributes {
514            let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
515            match attr.key.as_ref() {
516                b"xmlns" => {
517                    if attr.value.as_ref() != HTML_NS {
518                        return Err(ParseError::InvalidContainer);
519                    }
520                    has_ns_attribute = true;
521                }
522                b"class" => {
523                    if attr.value.as_ref() != b"container" {
524                        return Err(ParseError::InvalidContainer);
525                    }
526                    has_class_attribute = true;
527                }
528                _ => { /* Do nothing. */ }
529            }
530        }
531
532        if has_ns_attribute && has_class_attribute {
533            Ok(())
534        } else {
535            Err(ParseError::InvalidContainer)
536        }
537    }
538
539    fn is_text_container(attributes: Attributes<'_>) -> Result<bool, ParseError> {
540        let classes = parse_classes(attributes)?;
541        Ok(extract_base_class(&classes) == b"container")
542    }
543}