term_transcript/test/parser/
mod.rs1use std::{
4 borrow::Cow,
5 error::Error as StdError,
6 fmt,
7 io::{self, BufRead},
8 mem,
9 num::ParseIntError,
10 ops,
11 str::{self, Utf8Error},
12};
13
14use quick_xml::{
15 encoding::EncodingError,
16 events::{attributes::Attributes, Event},
17 Reader as XmlReader,
18};
19use termcolor::WriteColor;
20
21use self::text::TextReadingState;
22use crate::{
23 test::color_diff::ColorSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput,
24};
25
26#[cfg(test)]
27mod tests;
28mod text;
29
30fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
31 quick_xml::Error::Encoding(EncodingError::Utf8(err))
32}
33
34#[derive(Debug, Clone, Default)]
36pub struct Parsed {
37 pub(crate) plaintext: String,
38 pub(crate) color_spans: Vec<ColorSpan>,
39}
40
41impl Parsed {
42 const DEFAULT: Self = Self {
43 plaintext: String::new(),
44 color_spans: Vec::new(),
45 };
46
47 pub fn plaintext(&self) -> &str {
49 &self.plaintext
50 }
51
52 #[doc(hidden)] pub fn write_colorized(&self, out: &mut impl WriteColor) -> io::Result<()> {
59 ColorSpan::write_colorized(&self.color_spans, out, &self.plaintext)
60 }
61
62 fn into_input_text(self) -> String {
66 let mut text = if self.plaintext.starts_with(' ') {
67 self.plaintext[1..].to_owned()
68 } else {
69 self.plaintext
70 };
71
72 if text.ends_with('\n') {
73 text.pop();
74 }
75 text
76 }
77
78 fn trim_ending_newline(&mut self) {
79 if self.plaintext.ends_with('\n') {
80 self.plaintext.pop();
81 if let Some(last_span) = self.color_spans.last_mut() {
82 last_span.len -= 1;
83 }
84 }
85 }
86}
87
88impl TermOutput for Parsed {}
89
90impl Transcript<Parsed> {
91 #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
101 pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, LocatedParseError> {
102 let mut reader = XmlReader::from_reader(reader);
103 let mut buffer = vec![];
104 let mut state = ParserState::Initialized;
105 let mut transcript = Self::new();
106 let mut open_tags = 0;
107
108 #[allow(clippy::cast_possible_truncation)] loop {
110 let prev_position = reader.buffer_position() as usize;
111 let event = reader
112 .read_event_into(&mut buffer)
113 .map_err(|err| LocatedParseError::new(err.into(), prev_position..prev_position))?;
114 let event_position = prev_position..reader.buffer_position() as usize;
115 match &event {
116 Event::Start(_) => {
117 open_tags += 1;
118 }
119 Event::End(_) => {
120 open_tags -= 1;
121 if open_tags == 0 {
122 break;
123 }
124 }
125 Event::Eof => break,
126 _ => { }
127 }
128
129 let maybe_interaction = state
130 .process(event, event_position.clone())
131 .map_err(|err| LocatedParseError::new(err, event_position))?;
132 if let Some(interaction) = maybe_interaction {
133 #[cfg(feature = "tracing")]
134 tracing::debug!(
135 ?interaction.input,
136 interaction.output = ?interaction.output.plaintext,
137 ?interaction.exit_status,
138 "parsed interaction"
139 );
140 transcript.interactions.push(interaction);
141 }
142 }
143
144 match state {
145 ParserState::EncounteredContainer => Ok(transcript),
146 ParserState::EncounteredUserInput(interaction) => {
147 transcript.interactions.push(interaction);
148 Ok(transcript)
149 }
150 #[allow(clippy::cast_possible_truncation)] _ => {
152 let pos = reader.buffer_position() as usize;
153 Err(LocatedParseError::new(ParseError::UnexpectedEof, pos..pos))
154 }
155 }
156 }
157}
158
159fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
160 let mut class = None;
161 for attr in attributes {
162 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
163 if attr.key.as_ref() == b"class" {
164 class = Some(attr.value);
165 }
166 }
167 Ok(class.unwrap_or(Cow::Borrowed(b"")))
168}
169
170fn extract_base_class(classes: &[u8]) -> &[u8] {
171 let space_idx = classes.iter().position(|&ch| ch == b' ');
172 space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
173}
174
175fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
176 let mut exit_status = None;
177 for attr in attributes {
178 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
179 if attr.key.as_ref() == b"data-exit-status" {
180 let status =
181 str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
182 let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
183 exit_status = Some(ExitStatus(status));
184 }
185 }
186 Ok(exit_status)
187}
188
189#[derive(Debug)]
191#[non_exhaustive]
192pub enum ParseError {
193 UnexpectedRoot(String),
195 InvalidContainer,
197 InvalidExitStatus(ParseIntError),
199 UnexpectedEof,
201 InvalidHardBreak,
203 Xml(quick_xml::Error),
205}
206
207impl From<quick_xml::Error> for ParseError {
208 fn from(err: quick_xml::Error) -> Self {
209 Self::Xml(err)
210 }
211}
212
213impl From<io::Error> for ParseError {
214 fn from(err: io::Error) -> Self {
215 Self::Xml(err.into())
216 }
217}
218
219impl fmt::Display for ParseError {
220 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
221 match self {
222 Self::UnexpectedRoot(tag_name) => write!(
223 formatter,
224 "unexpected root XML tag: <{tag_name}>; expected <svg>"
225 ),
226 Self::InvalidContainer => formatter.write_str("invalid transcript container"),
227 Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
228 Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
229 Self::InvalidHardBreak => formatter.write_str("invalid hard line break"),
230 Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
231 }
232 }
233}
234
235impl StdError for ParseError {
236 fn source(&self) -> Option<&(dyn StdError + 'static)> {
237 match self {
238 Self::Xml(err) => Some(err),
239 Self::InvalidExitStatus(err) => Some(err),
240 _ => None,
241 }
242 }
243}
244
245#[derive(Debug)]
247pub struct LocatedParseError {
248 inner: ParseError,
249 location: ops::Range<usize>,
250}
251
252impl LocatedParseError {
253 fn new(inner: ParseError, location: ops::Range<usize>) -> Self {
254 Self { inner, location }
255 }
256
257 pub fn inner(&self) -> &ParseError {
259 &self.inner
260 }
261
262 pub fn location(&self) -> ops::Range<usize> {
264 self.location.clone()
265 }
266
267 pub fn into_inner(self) -> ParseError {
269 self.inner
270 }
271}
272
273impl fmt::Display for LocatedParseError {
274 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
275 let Self { inner, location } = self;
276 write!(formatter, "at {}-{}: {inner}", location.start, location.end)
277 }
278}
279
280impl StdError for LocatedParseError {
281 fn source(&self) -> Option<&(dyn StdError + 'static)> {
282 self.inner.source()
283 }
284}
285
286#[derive(Debug)]
287struct UserInputState {
288 exit_status: Option<ExitStatus>,
289 is_hidden: bool,
290 text: TextReadingState,
291 prompt: Option<Cow<'static, str>>,
292 prompt_open_tags: Option<usize>,
293}
294
295impl UserInputState {
296 fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
297 Self {
298 exit_status,
299 is_hidden,
300 text: TextReadingState::default(),
301 prompt: None,
302 prompt_open_tags: None,
303 }
304 }
305}
306
307impl UserInputState {
308 fn can_start_prompt(&self) -> bool {
310 self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
311 }
312
313 fn can_end_prompt(&self) -> bool {
314 self.prompt.is_none()
315 && self
316 .prompt_open_tags
317 .is_some_and(|tags| tags + 1 == self.text.open_tags())
318 }
319
320 fn process(
321 &mut self,
322 event: Event<'_>,
323 position: ops::Range<usize>,
324 ) -> Result<Option<Interaction<Parsed>>, ParseError> {
325 let mut is_prompt_end = false;
326 if let Event::Start(tag) = &event {
327 if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
328 self.prompt_open_tags = Some(self.text.open_tags());
330 }
331 } else if let Event::End(_) = &event {
332 if self.can_end_prompt() {
333 is_prompt_end = true;
334 }
335 }
336
337 let maybe_parsed = self.text.process(event, position)?;
338 if is_prompt_end {
339 if let Some(parsed) = maybe_parsed {
340 let input = UserInput {
342 text: String::new(),
343 prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
344 hidden: self.is_hidden,
345 };
346 return Ok(Some(Interaction {
347 input,
348 output: Parsed::default(),
349 exit_status: self.exit_status,
350 }));
351 }
352 let text = mem::take(&mut self.text.plaintext_buffer);
353 self.prompt = Some(UserInput::intern_prompt(text));
354 }
355
356 Ok(maybe_parsed.map(|parsed| {
357 let input = UserInput {
358 text: parsed.into_input_text(),
359 prompt: self.prompt.take(),
360 hidden: self.is_hidden,
361 };
362 Interaction {
363 input,
364 output: Parsed::default(),
365 exit_status: self.exit_status,
366 }
367 }))
368 }
369}
370
371#[derive(Debug)]
373enum ParserState {
374 Initialized,
376 EncounteredSvgTag,
378 EncounteredContainer,
380 ReadingUserInput(UserInputState),
382 EncounteredUserInput(Interaction<Parsed>),
384 ReadingTermOutput(Interaction<Parsed>, TextReadingState),
386}
387
388impl ParserState {
389 const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
390 input: UserInput {
391 text: String::new(),
392 prompt: None,
393 hidden: false,
394 },
395 output: Parsed::DEFAULT,
396 exit_status: None,
397 };
398
399 #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
400 fn set_state(&mut self, new_state: Self) {
401 *self = new_state;
402 }
403
404 #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
405 fn process(
406 &mut self,
407 event: Event<'_>,
408 position: ops::Range<usize>,
409 ) -> Result<Option<Interaction<Parsed>>, ParseError> {
410 match self {
411 Self::Initialized => {
412 if let Event::Start(tag) = event {
413 if tag.name().as_ref() == b"svg" {
414 *self = Self::EncounteredSvgTag;
415 } else {
416 let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
417 return Err(ParseError::UnexpectedRoot(tag_name));
418 }
419 }
420 }
421
422 Self::EncounteredSvgTag => {
423 if let Event::Start(tag) = event {
424 if tag.name().as_ref() == b"div" {
425 Self::verify_container_attrs(tag.attributes())?;
426 self.set_state(Self::EncounteredContainer);
427 } else if tag.name().as_ref() == b"text"
428 && Self::is_text_container(tag.attributes())?
429 {
430 self.set_state(Self::EncounteredContainer);
431 }
432 }
433 }
434
435 Self::EncounteredContainer => {
436 if let Event::Start(tag) = event {
437 let classes = parse_classes(tag.attributes())?;
438 if Self::is_input_class(extract_base_class(&classes)) {
439 let is_hidden = classes
440 .split(|byte| *byte == b' ')
441 .any(|chunk| chunk == b"input-hidden");
442 let exit_status = parse_exit_status(tag.attributes())?;
443 self.set_state(Self::ReadingUserInput(UserInputState::new(
444 exit_status,
445 is_hidden,
446 )));
447 }
448 }
449 }
450
451 Self::ReadingUserInput(state) => {
452 if let Some(interaction) = state.process(event, position)? {
453 self.set_state(Self::EncounteredUserInput(interaction));
454 }
455 }
456
457 Self::EncounteredUserInput(interaction) => {
458 if let Event::Start(tag) = event {
459 let classes = parse_classes(tag.attributes())?;
460 let base_class = extract_base_class(&classes);
461
462 if Self::is_output_class(base_class) {
463 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
464 self.set_state(Self::ReadingTermOutput(
465 interaction,
466 TextReadingState::default(),
467 ));
468 } else if Self::is_input_class(base_class) {
469 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
470 let exit_status = parse_exit_status(tag.attributes())?;
471 let is_hidden = classes
472 .split(|byte| *byte == b' ')
473 .any(|chunk| chunk == b"input-hidden");
474 self.set_state(Self::ReadingUserInput(UserInputState::new(
475 exit_status,
476 is_hidden,
477 )));
478 return Ok(Some(interaction));
479 }
480 }
481 }
482
483 Self::ReadingTermOutput(interaction, text_state) => {
484 if let Some(term_output) = text_state.process(event, position)? {
485 let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
486 interaction.output = term_output;
487 self.set_state(Self::EncounteredContainer);
488 return Ok(Some(interaction));
489 }
490 }
491 }
492 Ok(None)
493 }
494
495 fn is_input_class(class_name: &[u8]) -> bool {
496 class_name == b"input" || class_name == b"user-input"
497 }
498
499 fn is_output_class(class_name: &[u8]) -> bool {
500 class_name == b"output" || class_name == b"term-output"
501 }
502
503 #[cfg_attr(
504 feature = "tracing",
505 tracing::instrument(level = "debug", skip_all, err)
506 )]
507 fn verify_container_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
508 const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
509
510 let mut has_ns_attribute = false;
511 let mut has_class_attribute = false;
512
513 for attr in attributes {
514 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
515 match attr.key.as_ref() {
516 b"xmlns" => {
517 if attr.value.as_ref() != HTML_NS {
518 return Err(ParseError::InvalidContainer);
519 }
520 has_ns_attribute = true;
521 }
522 b"class" => {
523 if attr.value.as_ref() != b"container" {
524 return Err(ParseError::InvalidContainer);
525 }
526 has_class_attribute = true;
527 }
528 _ => { }
529 }
530 }
531
532 if has_ns_attribute && has_class_attribute {
533 Ok(())
534 } else {
535 Err(ParseError::InvalidContainer)
536 }
537 }
538
539 fn is_text_container(attributes: Attributes<'_>) -> Result<bool, ParseError> {
540 let classes = parse_classes(attributes)?;
541 Ok(extract_base_class(&classes) == b"container")
542 }
543}