term_transcript/test/parser/
mod.rs1use std::{
4 borrow::Cow,
5 error::Error as StdError,
6 fmt,
7 io::{self, BufRead},
8 mem,
9 num::ParseIntError,
10 ops,
11 str::{self, Utf8Error},
12};
13
14use quick_xml::{
15 encoding::EncodingError,
16 events::{attributes::Attributes, Event},
17 Reader as XmlReader,
18};
19
20use self::text::TextReadingState;
21use crate::{style::StyledSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput};
22
23#[cfg(test)]
24mod tests;
25mod text;
26
27fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
28 quick_xml::Error::Encoding(EncodingError::Utf8(err))
29}
30
31#[derive(Debug, Clone, Default)]
33pub struct Parsed {
34 pub(crate) plaintext: String,
35 pub(crate) styled_spans: Vec<StyledSpan<usize>>,
36}
37
38impl Parsed {
39 const DEFAULT: Self = Self {
40 plaintext: String::new(),
41 styled_spans: Vec::new(),
42 };
43
44 pub fn plaintext(&self) -> &str {
46 &self.plaintext
47 }
48
49 #[doc(hidden)]
55 pub fn write_colorized(&self, out: &mut impl io::Write) -> io::Result<()> {
56 StyledSpan::write_colorized(&self.styled_spans, out, &self.plaintext)
57 }
58
59 fn into_input_text(self) -> String {
63 let mut text = if self.plaintext.starts_with(' ') {
64 self.plaintext[1..].to_owned()
65 } else {
66 self.plaintext
67 };
68
69 if text.ends_with('\n') {
70 text.pop();
71 }
72 text
73 }
74
75 fn trim_ending_newline(&mut self) {
76 if self.plaintext.ends_with('\n') {
77 self.plaintext.pop();
78 if let Some(last_span) = self.styled_spans.last_mut() {
79 last_span.text -= 1;
80 }
81 }
82 }
83}
84
85impl TermOutput for Parsed {}
86
87impl Transcript<Parsed> {
88 #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
98 pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, LocatedParseError> {
99 let mut reader = XmlReader::from_reader(reader);
100 let mut buffer = vec![];
101 let mut state = ParserState::Initialized;
102 let mut transcript = Self::new();
103 let mut open_tags = 0;
104
105 #[allow(clippy::cast_possible_truncation)] loop {
107 let prev_position = reader.buffer_position() as usize;
108 let event = reader
109 .read_event_into(&mut buffer)
110 .map_err(|err| LocatedParseError::new(err.into(), prev_position..prev_position))?;
111 let event_position = prev_position..reader.buffer_position() as usize;
112 match &event {
113 Event::Start(_) => {
114 open_tags += 1;
115 }
116 Event::End(_) => {
117 open_tags -= 1;
118 if open_tags == 0 {
119 break;
120 }
121 }
122 Event::Eof => break,
123 _ => { }
124 }
125
126 let maybe_interaction = state
127 .process(event, event_position.clone())
128 .map_err(|err| LocatedParseError::new(err, event_position))?;
129 if let Some(interaction) = maybe_interaction {
130 #[cfg(feature = "tracing")]
131 tracing::debug!(
132 ?interaction.input,
133 interaction.output = ?interaction.output.plaintext,
134 ?interaction.exit_status,
135 "parsed interaction"
136 );
137 transcript.interactions.push(interaction);
138 }
139 }
140
141 match state {
142 ParserState::EncounteredContainer => Ok(transcript),
143 ParserState::EncounteredUserInput(interaction) => {
144 transcript.interactions.push(interaction);
145 Ok(transcript)
146 }
147 #[allow(clippy::cast_possible_truncation)] _ => {
149 let pos = reader.buffer_position() as usize;
150 Err(LocatedParseError::new(ParseError::UnexpectedEof, pos..pos))
151 }
152 }
153 }
154}
155
156fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
157 let mut class = None;
158 for attr in attributes {
159 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
160 if attr.key.as_ref() == b"class" {
161 class = Some(attr.value);
162 }
163 }
164 Ok(class.unwrap_or(Cow::Borrowed(b"")))
165}
166
167fn extract_base_class(classes: &[u8]) -> &[u8] {
168 let space_idx = classes.iter().position(|&ch| ch == b' ');
169 space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
170}
171
172fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
173 let mut exit_status = None;
174 for attr in attributes {
175 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
176 if attr.key.as_ref() == b"data-exit-status" {
177 let status =
178 str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
179 let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
180 exit_status = Some(ExitStatus(status));
181 }
182 }
183 Ok(exit_status)
184}
185
186#[derive(Debug)]
188#[non_exhaustive]
189pub enum ParseError {
190 UnexpectedRoot(String),
192 InvalidContainer,
194 InvalidExitStatus(ParseIntError),
196 UnexpectedEof,
198 InvalidHardBreak,
200 Xml(quick_xml::Error),
202}
203
204impl From<quick_xml::Error> for ParseError {
205 fn from(err: quick_xml::Error) -> Self {
206 Self::Xml(err)
207 }
208}
209
210impl From<io::Error> for ParseError {
211 fn from(err: io::Error) -> Self {
212 Self::Xml(err.into())
213 }
214}
215
216impl fmt::Display for ParseError {
217 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
218 match self {
219 Self::UnexpectedRoot(tag_name) => write!(
220 formatter,
221 "unexpected root XML tag: <{tag_name}>; expected <svg>"
222 ),
223 Self::InvalidContainer => formatter.write_str("invalid transcript container"),
224 Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
225 Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
226 Self::InvalidHardBreak => formatter.write_str("invalid hard line break"),
227 Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
228 }
229 }
230}
231
232impl StdError for ParseError {
233 fn source(&self) -> Option<&(dyn StdError + 'static)> {
234 match self {
235 Self::Xml(err) => Some(err),
236 Self::InvalidExitStatus(err) => Some(err),
237 _ => None,
238 }
239 }
240}
241
242#[derive(Debug)]
244pub struct LocatedParseError {
245 inner: ParseError,
246 location: ops::Range<usize>,
247}
248
249impl LocatedParseError {
250 fn new(inner: ParseError, location: ops::Range<usize>) -> Self {
251 Self { inner, location }
252 }
253
254 pub fn inner(&self) -> &ParseError {
256 &self.inner
257 }
258
259 pub fn location(&self) -> ops::Range<usize> {
261 self.location.clone()
262 }
263
264 pub fn into_inner(self) -> ParseError {
266 self.inner
267 }
268}
269
270impl fmt::Display for LocatedParseError {
271 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
272 let Self { inner, location } = self;
273 write!(formatter, "at {}-{}: {inner}", location.start, location.end)
274 }
275}
276
277impl StdError for LocatedParseError {
278 fn source(&self) -> Option<&(dyn StdError + 'static)> {
279 self.inner.source()
280 }
281}
282
283#[derive(Debug)]
284struct UserInputState {
285 exit_status: Option<ExitStatus>,
286 is_hidden: bool,
287 text: TextReadingState,
288 prompt: Option<Cow<'static, str>>,
289 prompt_open_tags: Option<usize>,
290}
291
292impl UserInputState {
293 fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
294 Self {
295 exit_status,
296 is_hidden,
297 text: TextReadingState::default(),
298 prompt: None,
299 prompt_open_tags: None,
300 }
301 }
302}
303
304impl UserInputState {
305 fn can_start_prompt(&self) -> bool {
307 self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
308 }
309
310 fn can_end_prompt(&self) -> bool {
311 self.prompt.is_none()
312 && self
313 .prompt_open_tags
314 .is_some_and(|tags| tags + 1 == self.text.open_tags())
315 }
316
317 fn process(
318 &mut self,
319 event: Event<'_>,
320 position: ops::Range<usize>,
321 ) -> Result<Option<Interaction<Parsed>>, ParseError> {
322 let mut is_prompt_end = false;
323 if let Event::Start(tag) = &event {
324 if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
325 self.prompt_open_tags = Some(self.text.open_tags());
327 }
328 } else if let Event::End(_) = &event {
329 if self.can_end_prompt() {
330 is_prompt_end = true;
331 }
332 }
333
334 let maybe_parsed = self.text.process(event, position)?;
335 if is_prompt_end {
336 if let Some(parsed) = maybe_parsed {
337 let input = UserInput {
339 text: String::new(),
340 prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
341 hidden: self.is_hidden,
342 };
343 return Ok(Some(Interaction {
344 input,
345 output: Parsed::default(),
346 exit_status: self.exit_status,
347 }));
348 }
349 let text = mem::take(&mut self.text.plaintext_buffer);
350 self.prompt = Some(UserInput::intern_prompt(text));
351 }
352
353 Ok(maybe_parsed.map(|parsed| {
354 let input = UserInput {
355 text: parsed.into_input_text(),
356 prompt: self.prompt.take(),
357 hidden: self.is_hidden,
358 };
359 Interaction {
360 input,
361 output: Parsed::default(),
362 exit_status: self.exit_status,
363 }
364 }))
365 }
366}
367
368#[derive(Debug)]
370enum ParserState {
371 Initialized,
373 EncounteredSvgTag,
375 EncounteredContainer,
377 ReadingUserInput(UserInputState),
379 EncounteredUserInput(Interaction<Parsed>),
381 ReadingTermOutput(Interaction<Parsed>, TextReadingState),
383}
384
385impl ParserState {
386 const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
387 input: UserInput {
388 text: String::new(),
389 prompt: None,
390 hidden: false,
391 },
392 output: Parsed::DEFAULT,
393 exit_status: None,
394 };
395
396 #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
397 fn set_state(&mut self, new_state: Self) {
398 *self = new_state;
399 }
400
401 #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
402 fn process(
403 &mut self,
404 event: Event<'_>,
405 position: ops::Range<usize>,
406 ) -> Result<Option<Interaction<Parsed>>, ParseError> {
407 match self {
408 Self::Initialized => {
409 if let Event::Start(tag) = event {
410 if tag.name().as_ref() == b"svg" {
411 *self = Self::EncounteredSvgTag;
412 } else {
413 let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
414 return Err(ParseError::UnexpectedRoot(tag_name));
415 }
416 }
417 }
418
419 Self::EncounteredSvgTag => {
420 if let Event::Start(tag) = event {
421 if tag.name().as_ref() == b"div" {
422 Self::verify_viewport_attrs(tag.attributes())?;
423 self.set_state(Self::EncounteredContainer);
424 } else if tag.name().as_ref() == b"g"
425 && Self::is_svg_container(tag.attributes())?
426 {
427 self.set_state(Self::EncounteredContainer);
428 }
429 }
430 }
431
432 Self::EncounteredContainer => {
433 if let Event::Start(tag) = event {
434 let classes = parse_classes(tag.attributes())?;
435 if Self::is_input_class(extract_base_class(&classes)) {
436 let is_hidden = classes
437 .split(|byte| *byte == b' ')
438 .any(|chunk| chunk == b"input-hidden");
439 let exit_status = parse_exit_status(tag.attributes())?;
440 self.set_state(Self::ReadingUserInput(UserInputState::new(
441 exit_status,
442 is_hidden,
443 )));
444 }
445 }
446 }
447
448 Self::ReadingUserInput(state) => {
449 if let Some(interaction) = state.process(event, position)? {
450 self.set_state(Self::EncounteredUserInput(interaction));
451 }
452 }
453
454 Self::EncounteredUserInput(interaction) => {
455 if let Event::Start(tag) = event {
456 let classes = parse_classes(tag.attributes())?;
457 let base_class = extract_base_class(&classes);
458
459 if Self::is_output_class(base_class) {
460 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
461 self.set_state(Self::ReadingTermOutput(
462 interaction,
463 TextReadingState::default(),
464 ));
465 } else if Self::is_input_class(base_class) {
466 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
467 let exit_status = parse_exit_status(tag.attributes())?;
468 let is_hidden = classes
469 .split(|byte| *byte == b' ')
470 .any(|chunk| chunk == b"input-hidden");
471 self.set_state(Self::ReadingUserInput(UserInputState::new(
472 exit_status,
473 is_hidden,
474 )));
475 return Ok(Some(interaction));
476 }
477 }
478 }
479
480 Self::ReadingTermOutput(interaction, text_state) => {
481 if let Some(term_output) = text_state.process(event, position)? {
482 let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
483 interaction.output = term_output;
484 self.set_state(Self::EncounteredContainer);
485 return Ok(Some(interaction));
486 }
487 }
488 }
489 Ok(None)
490 }
491
492 fn is_input_class(class_name: &[u8]) -> bool {
493 class_name == b"input" || class_name == b"user-input"
494 }
495
496 fn is_output_class(class_name: &[u8]) -> bool {
497 class_name == b"output" || class_name == b"term-output"
498 }
499
500 #[cfg_attr(
501 feature = "tracing",
502 tracing::instrument(level = "debug", skip_all, err)
503 )]
504 fn verify_viewport_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
505 const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
506
507 let mut has_ns_attribute = false;
508 let mut has_class_attribute = false;
509
510 for attr in attributes {
511 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
512 match attr.key.as_ref() {
513 b"xmlns" => {
514 if attr.value.as_ref() != HTML_NS {
515 return Err(ParseError::InvalidContainer);
516 }
517 has_ns_attribute = true;
518 }
519 b"class" => {
520 if ![b"viewport" as &[u8], b"container"].contains(&attr.value.as_ref()) {
523 return Err(ParseError::InvalidContainer);
524 }
525 has_class_attribute = true;
526 }
527 _ => { }
528 }
529 }
530
531 if has_ns_attribute && has_class_attribute {
532 Ok(())
533 } else {
534 Err(ParseError::InvalidContainer)
535 }
536 }
537
538 fn is_svg_container(attributes: Attributes<'_>) -> Result<bool, ParseError> {
539 let classes = parse_classes(attributes)?;
540 Ok(extract_base_class(&classes) == b"container")
541 }
542}