term_transcript/test/parser/
mod.rs

1//! SVG parsing logic.
2
3use std::{
4    borrow::Cow,
5    error::Error as StdError,
6    fmt,
7    io::{self, BufRead},
8    mem,
9    num::ParseIntError,
10    ops,
11    str::{self, Utf8Error},
12};
13
14use quick_xml::{
15    encoding::EncodingError,
16    events::{attributes::Attributes, Event},
17    Reader as XmlReader,
18};
19
20use self::text::TextReadingState;
21use crate::{style::StyledSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput};
22
23#[cfg(test)]
24mod tests;
25mod text;
26
27fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
28    quick_xml::Error::Encoding(EncodingError::Utf8(err))
29}
30
31/// Parsed terminal output.
32#[derive(Debug, Clone, Default)]
33pub struct Parsed {
34    pub(crate) plaintext: String,
35    pub(crate) styled_spans: Vec<StyledSpan<usize>>,
36}
37
38impl Parsed {
39    const DEFAULT: Self = Self {
40        plaintext: String::new(),
41        styled_spans: Vec::new(),
42    };
43
44    /// Returns the parsed plaintext.
45    pub fn plaintext(&self) -> &str {
46        &self.plaintext
47    }
48
49    /// Writes the parsed text with coloring / styles applied.
50    ///
51    /// # Errors
52    ///
53    /// - Returns an I/O error should it occur when writing to `out`.
54    #[doc(hidden)]
55    pub fn write_colorized(&self, out: &mut impl io::Write) -> io::Result<()> {
56        StyledSpan::write_colorized(&self.styled_spans, out, &self.plaintext)
57    }
58
59    /// Converts this parsed fragment into text for `UserInput`. This takes into account
60    /// that while the first space after prompt is inserted automatically, the further whitespace
61    /// may be significant.
62    fn into_input_text(self) -> String {
63        let mut text = if self.plaintext.starts_with(' ') {
64            self.plaintext[1..].to_owned()
65        } else {
66            self.plaintext
67        };
68
69        if text.ends_with('\n') {
70            text.pop();
71        }
72        text
73    }
74
75    fn trim_ending_newline(&mut self) {
76        if self.plaintext.ends_with('\n') {
77            self.plaintext.pop();
78            if let Some(last_span) = self.styled_spans.last_mut() {
79                last_span.text -= 1;
80            }
81        }
82    }
83}
84
85impl TermOutput for Parsed {}
86
87impl Transcript<Parsed> {
88    /// Parses a transcript from the provided `reader`, which should point to an SVG XML tree
89    /// produced by [`Template::render()`] (possibly within a larger document).
90    ///
91    /// # Errors
92    ///
93    /// - Returns an error if the input cannot be parsed, usually because it was not produced
94    ///   by `Template::render()`.
95    ///
96    /// [`Template::render()`]: crate::svg::Template::render()
97    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
98    pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, LocatedParseError> {
99        let mut reader = XmlReader::from_reader(reader);
100        let mut buffer = vec![];
101        let mut state = ParserState::Initialized;
102        let mut transcript = Self::new();
103        let mut open_tags = 0;
104
105        #[allow(clippy::cast_possible_truncation)] // Truncation shouldn't happen in practice
106        loop {
107            let prev_position = reader.buffer_position() as usize;
108            let event = reader
109                .read_event_into(&mut buffer)
110                .map_err(|err| LocatedParseError::new(err.into(), prev_position..prev_position))?;
111            let event_position = prev_position..reader.buffer_position() as usize;
112            match &event {
113                Event::Start(_) => {
114                    open_tags += 1;
115                }
116                Event::End(_) => {
117                    open_tags -= 1;
118                    if open_tags == 0 {
119                        break;
120                    }
121                }
122                Event::Eof => break,
123                _ => { /* Do nothing. */ }
124            }
125
126            let maybe_interaction = state
127                .process(event, event_position.clone())
128                .map_err(|err| LocatedParseError::new(err, event_position))?;
129            if let Some(interaction) = maybe_interaction {
130                #[cfg(feature = "tracing")]
131                tracing::debug!(
132                    ?interaction.input,
133                    interaction.output = ?interaction.output.plaintext,
134                    ?interaction.exit_status,
135                    "parsed interaction"
136                );
137                transcript.interactions.push(interaction);
138            }
139        }
140
141        match state {
142            ParserState::EncounteredContainer => Ok(transcript),
143            ParserState::EncounteredUserInput(interaction) => {
144                transcript.interactions.push(interaction);
145                Ok(transcript)
146            }
147            #[allow(clippy::cast_possible_truncation)] // Shouldn't happen in practice
148            _ => {
149                let pos = reader.buffer_position() as usize;
150                Err(LocatedParseError::new(ParseError::UnexpectedEof, pos..pos))
151            }
152        }
153    }
154}
155
156fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
157    let mut class = None;
158    for attr in attributes {
159        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
160        if attr.key.as_ref() == b"class" {
161            class = Some(attr.value);
162        }
163    }
164    Ok(class.unwrap_or(Cow::Borrowed(b"")))
165}
166
167fn extract_base_class(classes: &[u8]) -> &[u8] {
168    let space_idx = classes.iter().position(|&ch| ch == b' ');
169    space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
170}
171
172fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
173    let mut exit_status = None;
174    for attr in attributes {
175        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
176        if attr.key.as_ref() == b"data-exit-status" {
177            let status =
178                str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
179            let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
180            exit_status = Some(ExitStatus(status));
181        }
182    }
183    Ok(exit_status)
184}
185
186/// Errors that can occur during parsing SVG transcripts.
187#[derive(Debug)]
188#[non_exhaustive]
189pub enum ParseError {
190    /// Unexpected root XML tag; must be `<svg>`.
191    UnexpectedRoot(String),
192    /// Invalid transcript container.
193    InvalidContainer,
194    /// Invalid recorded exit status of an executed command.
195    InvalidExitStatus(ParseIntError),
196    /// Unexpected end of file.
197    UnexpectedEof,
198    /// Invalid hard line break.
199    InvalidHardBreak,
200    /// Error parsing XML.
201    Xml(quick_xml::Error),
202}
203
204impl From<quick_xml::Error> for ParseError {
205    fn from(err: quick_xml::Error) -> Self {
206        Self::Xml(err)
207    }
208}
209
210impl From<io::Error> for ParseError {
211    fn from(err: io::Error) -> Self {
212        Self::Xml(err.into())
213    }
214}
215
216impl fmt::Display for ParseError {
217    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
218        match self {
219            Self::UnexpectedRoot(tag_name) => write!(
220                formatter,
221                "unexpected root XML tag: <{tag_name}>; expected <svg>"
222            ),
223            Self::InvalidContainer => formatter.write_str("invalid transcript container"),
224            Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
225            Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
226            Self::InvalidHardBreak => formatter.write_str("invalid hard line break"),
227            Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
228        }
229    }
230}
231
232impl StdError for ParseError {
233    fn source(&self) -> Option<&(dyn StdError + 'static)> {
234        match self {
235            Self::Xml(err) => Some(err),
236            Self::InvalidExitStatus(err) => Some(err),
237            _ => None,
238        }
239    }
240}
241
242/// [`ParseError`] together with its location in the XML input.
243#[derive(Debug)]
244pub struct LocatedParseError {
245    inner: ParseError,
246    location: ops::Range<usize>,
247}
248
249impl LocatedParseError {
250    fn new(inner: ParseError, location: ops::Range<usize>) -> Self {
251        Self { inner, location }
252    }
253
254    /// Returns a reference to the contained [`ParseError`].
255    pub fn inner(&self) -> &ParseError {
256        &self.inner
257    }
258
259    /// Returns the error location as the starting and ending byte offsets in the input.
260    pub fn location(&self) -> ops::Range<usize> {
261        self.location.clone()
262    }
263
264    /// Unwraps the contained parse error.
265    pub fn into_inner(self) -> ParseError {
266        self.inner
267    }
268}
269
270impl fmt::Display for LocatedParseError {
271    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
272        let Self { inner, location } = self;
273        write!(formatter, "at {}-{}: {inner}", location.start, location.end)
274    }
275}
276
277impl StdError for LocatedParseError {
278    fn source(&self) -> Option<&(dyn StdError + 'static)> {
279        self.inner.source()
280    }
281}
282
283#[derive(Debug)]
284struct UserInputState {
285    exit_status: Option<ExitStatus>,
286    is_hidden: bool,
287    text: TextReadingState,
288    prompt: Option<Cow<'static, str>>,
289    prompt_open_tags: Option<usize>,
290}
291
292impl UserInputState {
293    fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
294        Self {
295            exit_status,
296            is_hidden,
297            text: TextReadingState::default(),
298            prompt: None,
299            prompt_open_tags: None,
300        }
301    }
302}
303
304impl UserInputState {
305    /// Can prompt reading be started now?
306    fn can_start_prompt(&self) -> bool {
307        self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
308    }
309
310    fn can_end_prompt(&self) -> bool {
311        self.prompt.is_none()
312            && self
313                .prompt_open_tags
314                .is_some_and(|tags| tags + 1 == self.text.open_tags())
315    }
316
317    fn process(
318        &mut self,
319        event: Event<'_>,
320        position: ops::Range<usize>,
321    ) -> Result<Option<Interaction<Parsed>>, ParseError> {
322        let mut is_prompt_end = false;
323        if let Event::Start(tag) = &event {
324            if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
325                // Got prompt start.
326                self.prompt_open_tags = Some(self.text.open_tags());
327            }
328        } else if let Event::End(_) = &event {
329            if self.can_end_prompt() {
330                is_prompt_end = true;
331            }
332        }
333
334        let maybe_parsed = self.text.process(event, position)?;
335        if is_prompt_end {
336            if let Some(parsed) = maybe_parsed {
337                // Special case: user input consists of the prompt only.
338                let input = UserInput {
339                    text: String::new(),
340                    prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
341                    hidden: self.is_hidden,
342                };
343                return Ok(Some(Interaction {
344                    input,
345                    output: Parsed::default(),
346                    exit_status: self.exit_status,
347                }));
348            }
349            let text = mem::take(&mut self.text.plaintext_buffer);
350            self.prompt = Some(UserInput::intern_prompt(text));
351        }
352
353        Ok(maybe_parsed.map(|parsed| {
354            let input = UserInput {
355                text: parsed.into_input_text(),
356                prompt: self.prompt.take(),
357                hidden: self.is_hidden,
358            };
359            Interaction {
360                input,
361                output: Parsed::default(),
362                exit_status: self.exit_status,
363            }
364        }))
365    }
366}
367
368/// States of the FSM for parsing SVGs.
369#[derive(Debug)]
370enum ParserState {
371    /// Initial state.
372    Initialized,
373    /// Encountered `<svg>` tag; searching for `<div class="container">`.
374    EncounteredSvgTag,
375    /// Encountered `<div class="container">`; searching for `<div class="input">`.
376    EncounteredContainer,
377    /// Reading user input (`<div class="input">` contents).
378    ReadingUserInput(UserInputState),
379    /// Finished reading user input; searching for `<div class="output">`.
380    EncounteredUserInput(Interaction<Parsed>),
381    /// Reading terminal output (`<div class="output">` contents).
382    ReadingTermOutput(Interaction<Parsed>, TextReadingState),
383}
384
385impl ParserState {
386    const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
387        input: UserInput {
388            text: String::new(),
389            prompt: None,
390            hidden: false,
391        },
392        output: Parsed::DEFAULT,
393        exit_status: None,
394    };
395
396    #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
397    fn set_state(&mut self, new_state: Self) {
398        *self = new_state;
399    }
400
401    #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
402    fn process(
403        &mut self,
404        event: Event<'_>,
405        position: ops::Range<usize>,
406    ) -> Result<Option<Interaction<Parsed>>, ParseError> {
407        match self {
408            Self::Initialized => {
409                if let Event::Start(tag) = event {
410                    if tag.name().as_ref() == b"svg" {
411                        *self = Self::EncounteredSvgTag;
412                    } else {
413                        let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
414                        return Err(ParseError::UnexpectedRoot(tag_name));
415                    }
416                }
417            }
418
419            Self::EncounteredSvgTag => {
420                if let Event::Start(tag) = event {
421                    if tag.name().as_ref() == b"div" {
422                        Self::verify_viewport_attrs(tag.attributes())?;
423                        self.set_state(Self::EncounteredContainer);
424                    } else if tag.name().as_ref() == b"g"
425                        && Self::is_svg_container(tag.attributes())?
426                    {
427                        self.set_state(Self::EncounteredContainer);
428                    }
429                }
430            }
431
432            Self::EncounteredContainer => {
433                if let Event::Start(tag) = event {
434                    let classes = parse_classes(tag.attributes())?;
435                    if Self::is_input_class(extract_base_class(&classes)) {
436                        let is_hidden = classes
437                            .split(|byte| *byte == b' ')
438                            .any(|chunk| chunk == b"input-hidden");
439                        let exit_status = parse_exit_status(tag.attributes())?;
440                        self.set_state(Self::ReadingUserInput(UserInputState::new(
441                            exit_status,
442                            is_hidden,
443                        )));
444                    }
445                }
446            }
447
448            Self::ReadingUserInput(state) => {
449                if let Some(interaction) = state.process(event, position)? {
450                    self.set_state(Self::EncounteredUserInput(interaction));
451                }
452            }
453
454            Self::EncounteredUserInput(interaction) => {
455                if let Event::Start(tag) = event {
456                    let classes = parse_classes(tag.attributes())?;
457                    let base_class = extract_base_class(&classes);
458
459                    if Self::is_output_class(base_class) {
460                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
461                        self.set_state(Self::ReadingTermOutput(
462                            interaction,
463                            TextReadingState::default(),
464                        ));
465                    } else if Self::is_input_class(base_class) {
466                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
467                        let exit_status = parse_exit_status(tag.attributes())?;
468                        let is_hidden = classes
469                            .split(|byte| *byte == b' ')
470                            .any(|chunk| chunk == b"input-hidden");
471                        self.set_state(Self::ReadingUserInput(UserInputState::new(
472                            exit_status,
473                            is_hidden,
474                        )));
475                        return Ok(Some(interaction));
476                    }
477                }
478            }
479
480            Self::ReadingTermOutput(interaction, text_state) => {
481                if let Some(term_output) = text_state.process(event, position)? {
482                    let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
483                    interaction.output = term_output;
484                    self.set_state(Self::EncounteredContainer);
485                    return Ok(Some(interaction));
486                }
487            }
488        }
489        Ok(None)
490    }
491
492    fn is_input_class(class_name: &[u8]) -> bool {
493        class_name == b"input" || class_name == b"user-input"
494    }
495
496    fn is_output_class(class_name: &[u8]) -> bool {
497        class_name == b"output" || class_name == b"term-output"
498    }
499
500    #[cfg_attr(
501        feature = "tracing",
502        tracing::instrument(level = "debug", skip_all, err)
503    )]
504    fn verify_viewport_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
505        const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
506
507        let mut has_ns_attribute = false;
508        let mut has_class_attribute = false;
509
510        for attr in attributes {
511            let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
512            match attr.key.as_ref() {
513                b"xmlns" => {
514                    if attr.value.as_ref() != HTML_NS {
515                        return Err(ParseError::InvalidContainer);
516                    }
517                    has_ns_attribute = true;
518                }
519                b"class" => {
520                    // Older versions had `div.container` as the top-level HTML element; now, it's wrapped
521                    // in `div.viewport`.
522                    if ![b"viewport" as &[u8], b"container"].contains(&attr.value.as_ref()) {
523                        return Err(ParseError::InvalidContainer);
524                    }
525                    has_class_attribute = true;
526                }
527                _ => { /* Do nothing. */ }
528            }
529        }
530
531        if has_ns_attribute && has_class_attribute {
532            Ok(())
533        } else {
534            Err(ParseError::InvalidContainer)
535        }
536    }
537
538    fn is_svg_container(attributes: Attributes<'_>) -> Result<bool, ParseError> {
539        let classes = parse_classes(attributes)?;
540        Ok(extract_base_class(&classes) == b"container")
541    }
542}