term_transcript/test/parser/
mod.rs

1//! SVG parsing logic.
2
3use std::{
4    borrow::Cow,
5    error::Error as StdError,
6    fmt,
7    io::{self, BufRead},
8    mem,
9    num::ParseIntError,
10    ops,
11    str::{self, Utf8Error},
12};
13
14use quick_xml::{
15    Reader as XmlReader,
16    encoding::EncodingError,
17    events::{Event, attributes::Attributes},
18};
19use styled_str::StyledString;
20
21use self::text::TextReadingState;
22use crate::{ExitStatus, Interaction, Transcript, UserInput};
23
24#[cfg(test)]
25mod tests;
26mod text;
27
28fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
29    quick_xml::Error::Encoding(EncodingError::Utf8(err))
30}
31
32/// Converts this parsed fragment into text for `UserInput`. This takes into account
33/// that while the first space after prompt is inserted automatically, the further whitespace
34/// may be significant.
35fn into_input_text(text: String) -> String {
36    let mut text = if let Some(stripped) = text.strip_prefix(' ') {
37        stripped.to_owned()
38    } else {
39        text
40    };
41
42    if text.ends_with('\n') {
43        text.pop();
44    }
45    text
46}
47
48impl Transcript {
49    /// Parses a transcript from the provided `reader`, which should point to an SVG XML tree
50    /// produced by [`Template::render()`] (possibly within a larger document).
51    ///
52    /// # Errors
53    ///
54    /// - Returns an error if the input cannot be parsed, usually because it was not produced
55    ///   by `Template::render()`.
56    ///
57    /// [`Template::render()`]: crate::svg::Template::render()
58    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
59    pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, LocatedParseError> {
60        let mut reader = XmlReader::from_reader(reader);
61        let mut buffer = vec![];
62        let mut state = ParserState::Initialized;
63        let mut transcript = Self::new();
64        let mut open_tags = 0;
65
66        #[allow(clippy::cast_possible_truncation)] // Truncation shouldn't happen in practice
67        loop {
68            let prev_position = reader.buffer_position() as usize;
69            let event = reader
70                .read_event_into(&mut buffer)
71                .map_err(|err| LocatedParseError::new(err.into(), prev_position..prev_position))?;
72            let event_position = prev_position..reader.buffer_position() as usize;
73            match &event {
74                Event::Start(_) => {
75                    open_tags += 1;
76                }
77                Event::End(_) => {
78                    open_tags -= 1;
79                    if open_tags == 0 {
80                        break;
81                    }
82                }
83                Event::Eof => break,
84                _ => { /* Do nothing. */ }
85            }
86
87            let maybe_interaction = state
88                .process(event, event_position.clone())
89                .map_err(|err| LocatedParseError::new(err, event_position))?;
90            if let Some(interaction) = maybe_interaction {
91                #[cfg(feature = "tracing")]
92                tracing::debug!(
93                    input = ?interaction.input(),
94                    output = interaction.output().text(),
95                    exit_status = ?interaction.exit_status(),
96                    "parsed interaction"
97                );
98                transcript.add_existing_interaction(interaction);
99            }
100        }
101
102        match state {
103            ParserState::EncounteredContainer => Ok(transcript),
104            ParserState::EncounteredUserInput(interaction) => {
105                transcript.add_existing_interaction(interaction.with_empty_output());
106                Ok(transcript)
107            }
108            #[allow(clippy::cast_possible_truncation)] // Shouldn't happen in practice
109            _ => {
110                let pos = reader.buffer_position() as usize;
111                Err(LocatedParseError::new(ParseError::UnexpectedEof, pos..pos))
112            }
113        }
114    }
115}
116
117fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
118    let mut class = None;
119    for attr in attributes {
120        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
121        if attr.key.as_ref() == b"class" {
122            class = Some(attr.value);
123        }
124    }
125    Ok(class.unwrap_or(Cow::Borrowed(b"")))
126}
127
128fn extract_base_class(classes: &[u8]) -> &[u8] {
129    let space_idx = classes.iter().position(|&ch| ch == b' ');
130    space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
131}
132
133fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
134    let mut exit_status = None;
135    for attr in attributes {
136        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
137        if attr.key.as_ref() == b"data-exit-status" {
138            let status =
139                str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
140            let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
141            exit_status = Some(ExitStatus(status));
142        }
143    }
144    Ok(exit_status)
145}
146
147/// Errors that can occur during parsing SVG transcripts.
148#[derive(Debug)]
149#[non_exhaustive]
150pub enum ParseError {
151    /// Unexpected root XML tag; must be `<svg>`.
152    UnexpectedRoot(String),
153    /// Invalid transcript container.
154    InvalidContainer,
155    /// Invalid recorded exit status of an executed command.
156    InvalidExitStatus(ParseIntError),
157    /// Unexpected end of file.
158    UnexpectedEof,
159    /// Invalid hard line break.
160    InvalidHardBreak,
161    /// Error parsing XML.
162    Xml(quick_xml::Error),
163}
164
165impl From<quick_xml::Error> for ParseError {
166    fn from(err: quick_xml::Error) -> Self {
167        Self::Xml(err)
168    }
169}
170
171impl From<io::Error> for ParseError {
172    fn from(err: io::Error) -> Self {
173        Self::Xml(err.into())
174    }
175}
176
177impl fmt::Display for ParseError {
178    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
179        match self {
180            Self::UnexpectedRoot(tag_name) => write!(
181                formatter,
182                "unexpected root XML tag: <{tag_name}>; expected <svg>"
183            ),
184            Self::InvalidContainer => formatter.write_str("invalid transcript container"),
185            Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
186            Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
187            Self::InvalidHardBreak => formatter.write_str("invalid hard line break"),
188            Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
189        }
190    }
191}
192
193impl StdError for ParseError {
194    fn source(&self) -> Option<&(dyn StdError + 'static)> {
195        match self {
196            Self::Xml(err) => Some(err),
197            Self::InvalidExitStatus(err) => Some(err),
198            _ => None,
199        }
200    }
201}
202
203/// [`ParseError`] together with its location in the XML input.
204#[derive(Debug)]
205pub struct LocatedParseError {
206    inner: ParseError,
207    location: ops::Range<usize>,
208}
209
210impl LocatedParseError {
211    fn new(inner: ParseError, location: ops::Range<usize>) -> Self {
212        Self { inner, location }
213    }
214
215    /// Returns a reference to the contained [`ParseError`].
216    pub fn inner(&self) -> &ParseError {
217        &self.inner
218    }
219
220    /// Returns the error location as the starting and ending byte offsets in the input.
221    pub fn location(&self) -> ops::Range<usize> {
222        self.location.clone()
223    }
224
225    /// Unwraps the contained parse error.
226    pub fn into_inner(self) -> ParseError {
227        self.inner
228    }
229}
230
231impl fmt::Display for LocatedParseError {
232    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
233        let Self { inner, location } = self;
234        write!(formatter, "at {}-{}: {inner}", location.start, location.end)
235    }
236}
237
238impl StdError for LocatedParseError {
239    fn source(&self) -> Option<&(dyn StdError + 'static)> {
240        self.inner.source()
241    }
242}
243
244#[derive(Debug)]
245struct InteractionInput {
246    input: UserInput,
247    exit_status: Option<ExitStatus>,
248}
249
250impl Default for InteractionInput {
251    fn default() -> Self {
252        Self {
253            input: UserInput::EMPTY,
254            exit_status: None,
255        }
256    }
257}
258
259impl InteractionInput {
260    fn with_output(self, output: StyledString) -> Interaction {
261        let mut interaction = Interaction::new(self.input, output);
262        interaction.set_exit_status(self.exit_status);
263        interaction
264    }
265
266    fn with_empty_output(self) -> Interaction {
267        self.with_output(StyledString::default())
268    }
269}
270
271#[derive(Debug)]
272struct UserInputState {
273    exit_status: Option<ExitStatus>,
274    is_hidden: bool,
275    text: TextReadingState,
276    prompt: Option<String>,
277    prompt_open_tags: Option<usize>,
278}
279
280impl UserInputState {
281    fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
282        Self {
283            exit_status,
284            is_hidden,
285            text: TextReadingState::default(),
286            prompt: None,
287            prompt_open_tags: None,
288        }
289    }
290}
291
292impl UserInputState {
293    /// Can prompt reading be started now?
294    fn can_start_prompt(&self) -> bool {
295        self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
296    }
297
298    fn can_end_prompt(&self) -> bool {
299        self.prompt.is_none()
300            && self
301                .prompt_open_tags
302                .is_some_and(|tags| tags + 1 == self.text.open_tags())
303    }
304
305    fn process(
306        &mut self,
307        event: Event<'_>,
308        position: ops::Range<usize>,
309    ) -> Result<Option<InteractionInput>, ParseError> {
310        let mut is_prompt_end = false;
311        if let Event::Start(tag) = &event {
312            if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
313                // Got prompt start.
314                self.prompt_open_tags = Some(self.text.open_tags());
315            }
316        } else if let Event::End(_) = &event {
317            if self.can_end_prompt() {
318                is_prompt_end = true;
319            }
320        }
321
322        let maybe_parsed = self.text.process(event, position)?;
323        if is_prompt_end {
324            if let Some(parsed) = maybe_parsed {
325                // Special case: user input consists of the prompt only.
326                let text = parsed.into_text();
327                let mut input = UserInput::new(String::new()).with_prompt(Some(text));
328                if self.is_hidden {
329                    input = input.hide();
330                }
331
332                return Ok(Some(InteractionInput {
333                    input,
334                    exit_status: self.exit_status,
335                }));
336            }
337            let text = self.text.take_plaintext();
338            self.prompt = Some(text);
339        }
340
341        Ok(maybe_parsed.map(|parsed| {
342            let text = parsed.into_text();
343            let mut input = UserInput::new(into_input_text(text)).with_prompt(self.prompt.take());
344            if self.is_hidden {
345                input = input.hide();
346            }
347
348            InteractionInput {
349                input,
350                exit_status: self.exit_status,
351            }
352        }))
353    }
354}
355
356/// States of the FSM for parsing SVGs.
357#[derive(Debug)]
358enum ParserState {
359    /// Initial state.
360    Initialized,
361    /// Encountered `<svg>` tag; searching for `<div class="container">`.
362    EncounteredSvgTag,
363    /// Encountered `<div class="container">`; searching for `<div class="input">`.
364    EncounteredContainer,
365    /// Reading user input (`<div class="input">` contents).
366    ReadingUserInput(UserInputState),
367    /// Finished reading user input; searching for `<div class="output">`.
368    EncounteredUserInput(InteractionInput),
369    /// Reading terminal output (`<div class="output">` contents).
370    ReadingTermOutput(InteractionInput, TextReadingState),
371}
372
373impl ParserState {
374    #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
375    fn set_state(&mut self, new_state: Self) {
376        *self = new_state;
377    }
378
379    #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
380    fn process(
381        &mut self,
382        event: Event<'_>,
383        position: ops::Range<usize>,
384    ) -> Result<Option<Interaction>, ParseError> {
385        match self {
386            Self::Initialized => {
387                if let Event::Start(tag) = event {
388                    if tag.name().as_ref() == b"svg" {
389                        *self = Self::EncounteredSvgTag;
390                    } else {
391                        let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
392                        return Err(ParseError::UnexpectedRoot(tag_name));
393                    }
394                }
395            }
396
397            Self::EncounteredSvgTag => {
398                if let Event::Start(tag) = event {
399                    if tag.name().as_ref() == b"div" {
400                        Self::verify_viewport_attrs(tag.attributes())?;
401                        self.set_state(Self::EncounteredContainer);
402                    } else if tag.name().as_ref() == b"g"
403                        && Self::is_svg_container(tag.attributes())?
404                    {
405                        self.set_state(Self::EncounteredContainer);
406                    }
407                }
408            }
409
410            Self::EncounteredContainer => {
411                if let Event::Start(tag) = event {
412                    let classes = parse_classes(tag.attributes())?;
413                    if Self::is_input_class(extract_base_class(&classes)) {
414                        let is_hidden = classes
415                            .split(|byte| *byte == b' ')
416                            .any(|chunk| chunk == b"input-hidden");
417                        let exit_status = parse_exit_status(tag.attributes())?;
418                        self.set_state(Self::ReadingUserInput(UserInputState::new(
419                            exit_status,
420                            is_hidden,
421                        )));
422                    }
423                }
424            }
425
426            Self::ReadingUserInput(state) => {
427                if let Some(interaction) = state.process(event, position)? {
428                    self.set_state(Self::EncounteredUserInput(interaction));
429                }
430            }
431
432            Self::EncounteredUserInput(interaction) => {
433                if let Event::Start(tag) = event {
434                    let classes = parse_classes(tag.attributes())?;
435                    let base_class = extract_base_class(&classes);
436
437                    if Self::is_output_class(base_class) {
438                        let interaction = mem::take(interaction);
439                        self.set_state(Self::ReadingTermOutput(
440                            interaction,
441                            TextReadingState::default(),
442                        ));
443                    } else if Self::is_input_class(base_class) {
444                        let interaction = mem::take(interaction);
445                        let exit_status = parse_exit_status(tag.attributes())?;
446                        let is_hidden = classes
447                            .split(|byte| *byte == b' ')
448                            .any(|chunk| chunk == b"input-hidden");
449                        self.set_state(Self::ReadingUserInput(UserInputState::new(
450                            exit_status,
451                            is_hidden,
452                        )));
453                        return Ok(Some(interaction.with_empty_output()));
454                    }
455                }
456            }
457
458            Self::ReadingTermOutput(interaction, text_state) => {
459                if let Some(term_output) = text_state.process(event, position)? {
460                    let interaction = mem::take(interaction);
461                    self.set_state(Self::EncounteredContainer);
462                    return Ok(Some(interaction.with_output(term_output)));
463                }
464            }
465        }
466        Ok(None)
467    }
468
469    fn is_input_class(class_name: &[u8]) -> bool {
470        class_name == b"input" || class_name == b"user-input"
471    }
472
473    fn is_output_class(class_name: &[u8]) -> bool {
474        class_name == b"output" || class_name == b"term-output"
475    }
476
477    #[cfg_attr(
478        feature = "tracing",
479        tracing::instrument(level = "debug", skip_all, err)
480    )]
481    fn verify_viewport_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
482        const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
483
484        let mut has_ns_attribute = false;
485        let mut has_class_attribute = false;
486
487        for attr in attributes {
488            let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
489            match attr.key.as_ref() {
490                b"xmlns" => {
491                    if attr.value.as_ref() != HTML_NS {
492                        return Err(ParseError::InvalidContainer);
493                    }
494                    has_ns_attribute = true;
495                }
496                b"class" => {
497                    // Older versions had `div.container` as the top-level HTML element; now, it's wrapped
498                    // in `div.viewport`.
499                    if ![b"viewport" as &[u8], b"container"].contains(&attr.value.as_ref()) {
500                        return Err(ParseError::InvalidContainer);
501                    }
502                    has_class_attribute = true;
503                }
504                _ => { /* Do nothing. */ }
505            }
506        }
507
508        if has_ns_attribute && has_class_attribute {
509            Ok(())
510        } else {
511            Err(ParseError::InvalidContainer)
512        }
513    }
514
515    fn is_svg_container(attributes: Attributes<'_>) -> Result<bool, ParseError> {
516        let classes = parse_classes(attributes)?;
517        Ok(extract_base_class(&classes) == b"container")
518    }
519}