term_transcript/test/parser/
mod.rs1use std::{
4 borrow::Cow,
5 error::Error as StdError,
6 fmt,
7 io::{self, BufRead},
8 mem,
9 num::ParseIntError,
10 ops,
11 str::{self, Utf8Error},
12};
13
14use quick_xml::{
15 Reader as XmlReader,
16 encoding::EncodingError,
17 events::{Event, attributes::Attributes},
18};
19use styled_str::StyledString;
20
21use self::text::TextReadingState;
22use crate::{ExitStatus, Interaction, Transcript, UserInput};
23
24#[cfg(test)]
25mod tests;
26mod text;
27
28fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
29 quick_xml::Error::Encoding(EncodingError::Utf8(err))
30}
31
32fn into_input_text(text: String) -> String {
36 let mut text = if let Some(stripped) = text.strip_prefix(' ') {
37 stripped.to_owned()
38 } else {
39 text
40 };
41
42 if text.ends_with('\n') {
43 text.pop();
44 }
45 text
46}
47
48impl Transcript {
49 #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
59 pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, LocatedParseError> {
60 let mut reader = XmlReader::from_reader(reader);
61 let mut buffer = vec![];
62 let mut state = ParserState::Initialized;
63 let mut transcript = Self::new();
64 let mut open_tags = 0;
65
66 #[allow(clippy::cast_possible_truncation)] loop {
68 let prev_position = reader.buffer_position() as usize;
69 let event = reader
70 .read_event_into(&mut buffer)
71 .map_err(|err| LocatedParseError::new(err.into(), prev_position..prev_position))?;
72 let event_position = prev_position..reader.buffer_position() as usize;
73 match &event {
74 Event::Start(_) => {
75 open_tags += 1;
76 }
77 Event::End(_) => {
78 open_tags -= 1;
79 if open_tags == 0 {
80 break;
81 }
82 }
83 Event::Eof => break,
84 _ => { }
85 }
86
87 let maybe_interaction = state
88 .process(event, event_position.clone())
89 .map_err(|err| LocatedParseError::new(err, event_position))?;
90 if let Some(interaction) = maybe_interaction {
91 #[cfg(feature = "tracing")]
92 tracing::debug!(
93 input = ?interaction.input(),
94 output = interaction.output().text(),
95 exit_status = ?interaction.exit_status(),
96 "parsed interaction"
97 );
98 transcript.add_existing_interaction(interaction);
99 }
100 }
101
102 match state {
103 ParserState::EncounteredContainer => Ok(transcript),
104 ParserState::EncounteredUserInput(interaction) => {
105 transcript.add_existing_interaction(interaction.with_empty_output());
106 Ok(transcript)
107 }
108 #[allow(clippy::cast_possible_truncation)] _ => {
110 let pos = reader.buffer_position() as usize;
111 Err(LocatedParseError::new(ParseError::UnexpectedEof, pos..pos))
112 }
113 }
114 }
115}
116
117fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
118 let mut class = None;
119 for attr in attributes {
120 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
121 if attr.key.as_ref() == b"class" {
122 class = Some(attr.value);
123 }
124 }
125 Ok(class.unwrap_or(Cow::Borrowed(b"")))
126}
127
128fn extract_base_class(classes: &[u8]) -> &[u8] {
129 let space_idx = classes.iter().position(|&ch| ch == b' ');
130 space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
131}
132
133fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
134 let mut exit_status = None;
135 for attr in attributes {
136 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
137 if attr.key.as_ref() == b"data-exit-status" {
138 let status =
139 str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
140 let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
141 exit_status = Some(ExitStatus(status));
142 }
143 }
144 Ok(exit_status)
145}
146
147#[derive(Debug)]
149#[non_exhaustive]
150pub enum ParseError {
151 UnexpectedRoot(String),
153 InvalidContainer,
155 InvalidExitStatus(ParseIntError),
157 UnexpectedEof,
159 InvalidHardBreak,
161 Xml(quick_xml::Error),
163}
164
165impl From<quick_xml::Error> for ParseError {
166 fn from(err: quick_xml::Error) -> Self {
167 Self::Xml(err)
168 }
169}
170
171impl From<io::Error> for ParseError {
172 fn from(err: io::Error) -> Self {
173 Self::Xml(err.into())
174 }
175}
176
177impl fmt::Display for ParseError {
178 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
179 match self {
180 Self::UnexpectedRoot(tag_name) => write!(
181 formatter,
182 "unexpected root XML tag: <{tag_name}>; expected <svg>"
183 ),
184 Self::InvalidContainer => formatter.write_str("invalid transcript container"),
185 Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
186 Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
187 Self::InvalidHardBreak => formatter.write_str("invalid hard line break"),
188 Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
189 }
190 }
191}
192
193impl StdError for ParseError {
194 fn source(&self) -> Option<&(dyn StdError + 'static)> {
195 match self {
196 Self::Xml(err) => Some(err),
197 Self::InvalidExitStatus(err) => Some(err),
198 _ => None,
199 }
200 }
201}
202
203#[derive(Debug)]
205pub struct LocatedParseError {
206 inner: ParseError,
207 location: ops::Range<usize>,
208}
209
210impl LocatedParseError {
211 fn new(inner: ParseError, location: ops::Range<usize>) -> Self {
212 Self { inner, location }
213 }
214
215 pub fn inner(&self) -> &ParseError {
217 &self.inner
218 }
219
220 pub fn location(&self) -> ops::Range<usize> {
222 self.location.clone()
223 }
224
225 pub fn into_inner(self) -> ParseError {
227 self.inner
228 }
229}
230
231impl fmt::Display for LocatedParseError {
232 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
233 let Self { inner, location } = self;
234 write!(formatter, "at {}-{}: {inner}", location.start, location.end)
235 }
236}
237
238impl StdError for LocatedParseError {
239 fn source(&self) -> Option<&(dyn StdError + 'static)> {
240 self.inner.source()
241 }
242}
243
244#[derive(Debug)]
245struct InteractionInput {
246 input: UserInput,
247 exit_status: Option<ExitStatus>,
248}
249
250impl Default for InteractionInput {
251 fn default() -> Self {
252 Self {
253 input: UserInput::EMPTY,
254 exit_status: None,
255 }
256 }
257}
258
259impl InteractionInput {
260 fn with_output(self, output: StyledString) -> Interaction {
261 let mut interaction = Interaction::new(self.input, output);
262 interaction.set_exit_status(self.exit_status);
263 interaction
264 }
265
266 fn with_empty_output(self) -> Interaction {
267 self.with_output(StyledString::default())
268 }
269}
270
271#[derive(Debug)]
272struct UserInputState {
273 exit_status: Option<ExitStatus>,
274 is_hidden: bool,
275 text: TextReadingState,
276 prompt: Option<String>,
277 prompt_open_tags: Option<usize>,
278}
279
280impl UserInputState {
281 fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
282 Self {
283 exit_status,
284 is_hidden,
285 text: TextReadingState::default(),
286 prompt: None,
287 prompt_open_tags: None,
288 }
289 }
290}
291
292impl UserInputState {
293 fn can_start_prompt(&self) -> bool {
295 self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
296 }
297
298 fn can_end_prompt(&self) -> bool {
299 self.prompt.is_none()
300 && self
301 .prompt_open_tags
302 .is_some_and(|tags| tags + 1 == self.text.open_tags())
303 }
304
305 fn process(
306 &mut self,
307 event: Event<'_>,
308 position: ops::Range<usize>,
309 ) -> Result<Option<InteractionInput>, ParseError> {
310 let mut is_prompt_end = false;
311 if let Event::Start(tag) = &event {
312 if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
313 self.prompt_open_tags = Some(self.text.open_tags());
315 }
316 } else if let Event::End(_) = &event {
317 if self.can_end_prompt() {
318 is_prompt_end = true;
319 }
320 }
321
322 let maybe_parsed = self.text.process(event, position)?;
323 if is_prompt_end {
324 if let Some(parsed) = maybe_parsed {
325 let text = parsed.into_text();
327 let mut input = UserInput::new(String::new()).with_prompt(Some(text));
328 if self.is_hidden {
329 input = input.hide();
330 }
331
332 return Ok(Some(InteractionInput {
333 input,
334 exit_status: self.exit_status,
335 }));
336 }
337 let text = self.text.take_plaintext();
338 self.prompt = Some(text);
339 }
340
341 Ok(maybe_parsed.map(|parsed| {
342 let text = parsed.into_text();
343 let mut input = UserInput::new(into_input_text(text)).with_prompt(self.prompt.take());
344 if self.is_hidden {
345 input = input.hide();
346 }
347
348 InteractionInput {
349 input,
350 exit_status: self.exit_status,
351 }
352 }))
353 }
354}
355
356#[derive(Debug)]
358enum ParserState {
359 Initialized,
361 EncounteredSvgTag,
363 EncounteredContainer,
365 ReadingUserInput(UserInputState),
367 EncounteredUserInput(InteractionInput),
369 ReadingTermOutput(InteractionInput, TextReadingState),
371}
372
373impl ParserState {
374 #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
375 fn set_state(&mut self, new_state: Self) {
376 *self = new_state;
377 }
378
379 #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
380 fn process(
381 &mut self,
382 event: Event<'_>,
383 position: ops::Range<usize>,
384 ) -> Result<Option<Interaction>, ParseError> {
385 match self {
386 Self::Initialized => {
387 if let Event::Start(tag) = event {
388 if tag.name().as_ref() == b"svg" {
389 *self = Self::EncounteredSvgTag;
390 } else {
391 let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
392 return Err(ParseError::UnexpectedRoot(tag_name));
393 }
394 }
395 }
396
397 Self::EncounteredSvgTag => {
398 if let Event::Start(tag) = event {
399 if tag.name().as_ref() == b"div" {
400 Self::verify_viewport_attrs(tag.attributes())?;
401 self.set_state(Self::EncounteredContainer);
402 } else if tag.name().as_ref() == b"g"
403 && Self::is_svg_container(tag.attributes())?
404 {
405 self.set_state(Self::EncounteredContainer);
406 }
407 }
408 }
409
410 Self::EncounteredContainer => {
411 if let Event::Start(tag) = event {
412 let classes = parse_classes(tag.attributes())?;
413 if Self::is_input_class(extract_base_class(&classes)) {
414 let is_hidden = classes
415 .split(|byte| *byte == b' ')
416 .any(|chunk| chunk == b"input-hidden");
417 let exit_status = parse_exit_status(tag.attributes())?;
418 self.set_state(Self::ReadingUserInput(UserInputState::new(
419 exit_status,
420 is_hidden,
421 )));
422 }
423 }
424 }
425
426 Self::ReadingUserInput(state) => {
427 if let Some(interaction) = state.process(event, position)? {
428 self.set_state(Self::EncounteredUserInput(interaction));
429 }
430 }
431
432 Self::EncounteredUserInput(interaction) => {
433 if let Event::Start(tag) = event {
434 let classes = parse_classes(tag.attributes())?;
435 let base_class = extract_base_class(&classes);
436
437 if Self::is_output_class(base_class) {
438 let interaction = mem::take(interaction);
439 self.set_state(Self::ReadingTermOutput(
440 interaction,
441 TextReadingState::default(),
442 ));
443 } else if Self::is_input_class(base_class) {
444 let interaction = mem::take(interaction);
445 let exit_status = parse_exit_status(tag.attributes())?;
446 let is_hidden = classes
447 .split(|byte| *byte == b' ')
448 .any(|chunk| chunk == b"input-hidden");
449 self.set_state(Self::ReadingUserInput(UserInputState::new(
450 exit_status,
451 is_hidden,
452 )));
453 return Ok(Some(interaction.with_empty_output()));
454 }
455 }
456 }
457
458 Self::ReadingTermOutput(interaction, text_state) => {
459 if let Some(term_output) = text_state.process(event, position)? {
460 let interaction = mem::take(interaction);
461 self.set_state(Self::EncounteredContainer);
462 return Ok(Some(interaction.with_output(term_output)));
463 }
464 }
465 }
466 Ok(None)
467 }
468
469 fn is_input_class(class_name: &[u8]) -> bool {
470 class_name == b"input" || class_name == b"user-input"
471 }
472
473 fn is_output_class(class_name: &[u8]) -> bool {
474 class_name == b"output" || class_name == b"term-output"
475 }
476
477 #[cfg_attr(
478 feature = "tracing",
479 tracing::instrument(level = "debug", skip_all, err)
480 )]
481 fn verify_viewport_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
482 const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
483
484 let mut has_ns_attribute = false;
485 let mut has_class_attribute = false;
486
487 for attr in attributes {
488 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
489 match attr.key.as_ref() {
490 b"xmlns" => {
491 if attr.value.as_ref() != HTML_NS {
492 return Err(ParseError::InvalidContainer);
493 }
494 has_ns_attribute = true;
495 }
496 b"class" => {
497 if ![b"viewport" as &[u8], b"container"].contains(&attr.value.as_ref()) {
500 return Err(ParseError::InvalidContainer);
501 }
502 has_class_attribute = true;
503 }
504 _ => { }
505 }
506 }
507
508 if has_ns_attribute && has_class_attribute {
509 Ok(())
510 } else {
511 Err(ParseError::InvalidContainer)
512 }
513 }
514
515 fn is_svg_container(attributes: Attributes<'_>) -> Result<bool, ParseError> {
516 let classes = parse_classes(attributes)?;
517 Ok(extract_base_class(&classes) == b"container")
518 }
519}