compile_fmt/
utils.rs

1//! Miscellaneous utils.
2
3use core::slice;
4
5#[derive(Debug, Clone, Copy, PartialEq)]
6pub(crate) enum ClippedStr<'a> {
7    Full(&'a [u8]),
8    Clipped(&'a [u8]),
9}
10
11impl<'a> ClippedStr<'a> {
12    /// Returns bytes corresponding to first `char_count` chars in `s`. If `s` contains less chars,
13    /// it's returned in full.
14    pub const fn new(s: &'a str, mut char_count: usize) -> Self {
15        let s_bytes = s.as_bytes();
16        let mut pos = 0;
17        while pos < s_bytes.len() && char_count > 0 {
18            if s_bytes[pos] < 128 {
19                pos += 1;
20            } else if s_bytes[pos] >> 5 == 0b_110 {
21                pos += 2;
22            } else if s_bytes[pos] >> 4 == 0b_1110 {
23                pos += 3;
24            } else if s_bytes[pos] >> 3 == 0b_11110 {
25                pos += 4;
26            } else {
27                unreachable!(); // Invalid UTF-8 encoding
28            }
29            char_count -= 1;
30        }
31        assert!(pos <= s_bytes.len(), "Invalid UTF-8 encoding");
32        // SAFETY: Slicing a byte slice with length being in bounds is safe.
33        let bytes = unsafe { slice::from_raw_parts(s_bytes.as_ptr(), pos) };
34        if pos < s_bytes.len() {
35            Self::Clipped(bytes)
36        } else {
37            Self::Full(bytes)
38        }
39    }
40}
41
42/// Counts the number of chars in a string.
43pub(crate) const fn count_chars(s: &str) -> usize {
44    let s_bytes = s.as_bytes();
45    let mut pos = 0;
46    let mut char_count = 0;
47    while pos < s_bytes.len() {
48        if s_bytes[pos] < 128 {
49            pos += 1;
50        } else if s_bytes[pos] >> 5 == 0b_110 {
51            pos += 2;
52        } else if s_bytes[pos] >> 4 == 0b_1110 {
53            pos += 3;
54        } else if s_bytes[pos] >> 3 == 0b_11110 {
55            pos += 4;
56        } else {
57            unreachable!(); // Invalid UTF-8 encoding
58        }
59        char_count += 1;
60    }
61    char_count
62}
63
64pub(crate) const fn assert_is_ascii(s: &str) {
65    const CLIP_LEN: usize = 32;
66
67    let s_bytes = s.as_bytes();
68    let mut pos = 0;
69    while pos < s_bytes.len() {
70        if s_bytes[pos] < 128 {
71            pos += 1;
72        } else {
73            crate::compile_panic!(
74                "String '", s => crate::clip(CLIP_LEN, "…"), "' contains non-ASCII chars; \
75                 first at position ", pos => crate::fmt::<usize>()
76            );
77        }
78    }
79}
80
81#[cfg(test)]
82mod tests {
83    use super::*;
84
85    #[test]
86    fn extracting_first_chars_from_ascii_string() {
87        assert_eq!(ClippedStr::new("Test", 1), ClippedStr::Clipped(b"T"));
88        assert_eq!(ClippedStr::new("Test", 2), ClippedStr::Clipped(b"Te"));
89        assert_eq!(ClippedStr::new("Test", 3), ClippedStr::Clipped(b"Tes"));
90        for char_count in [4, 5, 8, 32, 128] {
91            assert_eq!(
92                ClippedStr::new("Test", char_count),
93                ClippedStr::Full(b"Test")
94            );
95        }
96    }
97
98    #[test]
99    fn extracting_first_chars_from_utf8_string() {
100        assert_eq!(
101            ClippedStr::new("💣Test", 1),
102            ClippedStr::Clipped("💣".as_bytes())
103        );
104        assert_eq!(
105            ClippedStr::new("💣Test", 2),
106            ClippedStr::Clipped("💣T".as_bytes())
107        );
108        assert_eq!(
109            ClippedStr::new("T💣est", 3),
110            ClippedStr::Clipped("T💣e".as_bytes())
111        );
112        assert_eq!(
113            ClippedStr::new("T💣eßtℝ", 4),
114            ClippedStr::Clipped("T💣eß".as_bytes())
115        );
116        assert_eq!(
117            ClippedStr::new("Tℝ💣eßt", 4),
118            ClippedStr::Clipped("Tℝ💣e".as_bytes())
119        );
120        assert_eq!(
121            ClippedStr::new("Tℝ💣eßt", 5),
122            ClippedStr::Clipped("Tℝ💣eß".as_bytes())
123        );
124
125        for char_count in [6, 8, 32, 128] {
126            assert_eq!(
127                ClippedStr::new("Tℝ💣eßt", char_count),
128                ClippedStr::Full("Tℝ💣eßt".as_bytes())
129            );
130        }
131    }
132}