font_subset/font/
name.rs

1//! `name` table.
2
3use core::{cmp, ops};
4
5use super::Cursor;
6use crate::{
7    alloc::{BTreeMap, String, Vec},
8    write::{VecExt, WriteTable},
9    ParseError, ParseErrorKind, TableTag,
10};
11
12#[derive(Debug, Clone, Copy)]
13enum PlatformId {
14    Unicode,
15    Macintosh,
16    Windows,
17}
18
19#[derive(Debug)]
20struct NameRecord {
21    name_id: u16,
22    value: Option<String>,
23}
24
25impl NameRecord {
26    const COPYRIGHT_NOTICE_ID: u16 = 0;
27    const FAMILY_NAME_ID: u16 = 1;
28    const SUBFAMILY_NAME_ID: u16 = 2;
29    const VERSION_ID: u16 = 5;
30    const MANUFACTURER_ID: u16 = 8;
31    const DESIGNER_ID: u16 = 9;
32    const DESIGNER_URL_ID: u16 = 12;
33    const LICENSE_ID: u16 = 13;
34    const LICENSE_URL_ID: u16 = 14;
35    const MAX_STANDARD_ID: u16 = 25;
36
37    const BYTE_SIZE: usize = 12;
38
39    fn parse(cursor: &mut Cursor<'_>, string_storage: Cursor<'_>) -> Result<Self, ParseError> {
40        let platform_id = cursor.read_u16_checked(|raw| match raw {
41            0 => Ok(PlatformId::Unicode),
42            1 => Ok(PlatformId::Macintosh),
43            3 => Ok(PlatformId::Windows),
44            _ => Err(ParseErrorKind::UnexpectedValue {
45                name: "platform_id",
46                expected: "one of 0, 1, or 3".into(),
47                actual: raw.into(),
48            }),
49        })?;
50        let encoding_id = cursor.read_u16()?;
51        cursor.skip(2)?; // language_id; TODO: take into account?
52        let name_id = cursor.read_u16()?;
53        let length = cursor.read_u16()?;
54        let offset = cursor.read_u16()?;
55
56        let offset_usize = usize::from(offset);
57        let data_cursor =
58            string_storage.read_range(offset_usize..(offset_usize + usize::from(length)))?;
59        let is_utf16 = matches!(
60            (platform_id, encoding_id),
61            (PlatformId::Unicode, _) | (PlatformId::Windows, 1 | 10)
62        );
63
64        let value: Option<String> = if is_utf16 {
65            if length % 2 != 0 {
66                return Err(data_cursor.err(ParseErrorKind::UnexpectedValue {
67                    name: "length",
68                    expected: "even value".into(),
69                    actual: length.into(),
70                }));
71            }
72
73            // This is how (unstable) `String::from_utf16be()` is implemented on low-endian architectures.
74            let u16_iter = data_cursor.bytes().chunks(2).map(|chunk| {
75                // `unwrap()` is safe due to the oddity check above
76                u16::from_be_bytes(chunk.try_into().unwrap())
77            });
78            let string = char::decode_utf16(u16_iter)
79                .collect::<Result<_, _>>()
80                .map_err(|_| data_cursor.err(ParseErrorKind::Utf16))?;
81            Some(string)
82        } else {
83            None
84        };
85
86        Ok(Self { name_id, value })
87    }
88}
89
90/// OpenType font naming information extracted from the `name` table.
91#[derive(Debug, Clone, Copy, Default)]
92pub struct FontNaming<'a> {
93    /// Family name, e.g. "Fira Mono".
94    pub family: Option<&'a str>,
95    /// Subfamily name, e.g. "Regular".
96    pub subfamily: Option<&'a str>,
97    version: Option<&'a str>,
98    /// Font manufacturer.
99    pub manufacturer: Option<&'a str>,
100    /// Font designer.
101    pub designer: Option<&'a str>,
102    /// URL of the font designer.
103    pub designer_url: Option<&'a str>,
104    /// Copyright notice.
105    pub copyright_notice: Option<&'a str>,
106    /// Font license.
107    pub license: Option<&'a str>,
108    /// Font license URL.
109    pub license_url: Option<&'a str>,
110}
111
112impl<'a> FontNaming<'a> {
113    fn new(map: &'a BTreeMap<u16, String>) -> Self {
114        Self {
115            family: map.get(&NameRecord::FAMILY_NAME_ID).map(String::as_str),
116            subfamily: map.get(&NameRecord::SUBFAMILY_NAME_ID).map(String::as_str),
117            version: map.get(&NameRecord::VERSION_ID).map(String::as_str),
118            manufacturer: map.get(&NameRecord::MANUFACTURER_ID).map(String::as_str),
119            designer: map.get(&NameRecord::DESIGNER_ID).map(String::as_str),
120            designer_url: map.get(&NameRecord::DESIGNER_URL_ID).map(String::as_str),
121            copyright_notice: map
122                .get(&NameRecord::COPYRIGHT_NOTICE_ID)
123                .map(String::as_str),
124            license: map.get(&NameRecord::LICENSE_ID).map(String::as_str),
125            license_url: map.get(&NameRecord::LICENSE_URL_ID).map(String::as_str),
126        }
127    }
128
129    /// Returns the font version, with the "Version " prefix stripped.
130    pub fn version(&self) -> Option<&str> {
131        let version = self.version?;
132        Some(version.strip_prefix("Version ").unwrap_or(version))
133    }
134}
135
136#[derive(Debug, Clone)]
137pub(crate) struct NameTable<'a> {
138    pub(super) parsed_names: BTreeMap<u16, String>,
139    /// `None` for subset fonts
140    all_bytes: Option<&'a [u8]>,
141}
142
143impl<'a> NameTable<'a> {
144    #[cfg_attr(
145        feature = "tracing",
146        tracing::instrument(level = "debug", err, skip(cursor), fields(range = ?cursor.range()))
147    )]
148    pub(super) fn parse(
149        mut cursor: Cursor<'a>,
150        additional_ids: &[u16],
151    ) -> Result<Self, ParseError> {
152        let mut string_storage = cursor;
153        let all_bytes = cursor.bytes();
154
155        cursor.read_u16_checked(|version| {
156            if version != 0 && version != 1 {
157                return Err(ParseErrorKind::UnexpectedValue {
158                    name: "version",
159                    expected: "0 or 1".into(),
160                    actual: version.into(),
161                });
162            }
163            Ok(())
164        })?;
165
166        let record_count = cursor.read_u16()?;
167        let storage_offset = cursor.read_u16()?;
168        string_storage.skip(storage_offset.into())?;
169
170        let mut parsed_names = BTreeMap::new();
171        for _ in 0..record_count {
172            let record = NameRecord::parse(&mut cursor, string_storage)?;
173            #[cfg(feature = "tracing")]
174            tracing::trace!(?record, "parsed name record");
175
176            let Some(value) = record.value else {
177                continue;
178            };
179            let id = record.name_id;
180            if id <= NameRecord::MAX_STANDARD_ID || additional_ids.contains(&id) {
181                parsed_names.insert(id, value);
182            }
183        }
184        #[cfg(feature = "tracing")]
185        tracing::debug!(?parsed_names, "parsed well-known names");
186
187        Ok(Self {
188            parsed_names,
189            all_bytes: Some(all_bytes),
190        })
191    }
192
193    pub(super) fn parsed(&self) -> FontNaming<'_> {
194        FontNaming::new(&self.parsed_names)
195    }
196
197    pub(crate) fn subset(&mut self, modify_version: bool) {
198        const VERSION_APPENDIX: &str = concat!(
199            "; subset w/ ",
200            env!("CARGO_PKG_NAME"),
201            " ",
202            env!("CARGO_PKG_VERSION")
203        );
204
205        self.all_bytes = None;
206        if modify_version {
207            let version = self.parsed_names.get_mut(&NameRecord::VERSION_ID);
208            if let Some(version) = version {
209                if !version.ends_with(VERSION_APPENDIX) {
210                    version.push_str(VERSION_APPENDIX);
211                }
212            }
213        }
214    }
215
216    /// Interns the provided strings into a single piece of data, encodes it in UTF-16, and
217    /// provides `u16` offsets for each string.
218    ///
219    /// The used approach is quite slow, but it should work for small strings `name` typically deals with.
220    fn intern_strings<'s>(
221        strings: impl Iterator<Item = (u16, &'s str)>,
222    ) -> (Vec<u16>, Vec<ops::Range<usize>>) {
223        let mut strings: Vec<_> = strings.collect();
224        // Sort strings from longer ones to shorter ones.
225        strings.sort_unstable_by_key(|(_, s)| cmp::Reverse(s.len()));
226
227        let (mut data, mut ranges) = (String::new(), Vec::with_capacity(strings.len()));
228        for (id, s) in strings {
229            let new_offset = if let Some(pos) = data.find(s) {
230                pos
231            } else {
232                let prev_len = data.len();
233                data.push_str(s);
234                prev_len
235            };
236            ranges.push((id, new_offset..new_offset + s.len()));
237        }
238
239        // Now, we need to translate UTF-8 offsets to UTF-16.
240        let mut offsets_mut: Vec<_> = ranges
241            .iter_mut()
242            .flat_map(|(_, range)| [&mut range.start, &mut range.end])
243            .collect();
244
245        offsets_mut.sort_unstable_by_key(|offset| **offset);
246        let mut utf16_data = Vec::new();
247        let mut prev_offset = 0;
248        for offset in &mut offsets_mut {
249            utf16_data.extend(data[prev_offset..**offset].encode_utf16());
250            prev_offset = **offset;
251            **offset = utf16_data.len();
252        }
253        debug_assert_eq!(prev_offset, data.len());
254
255        ranges.sort_unstable_by_key(|(id, _)| *id);
256        let offsets = ranges.into_iter().map(|(_, offset)| offset).collect();
257        (utf16_data, offsets)
258    }
259}
260
261impl WriteTable for NameTable<'_> {
262    fn tag(&self) -> TableTag {
263        TableTag::NAME
264    }
265
266    fn write_to_vec(&self, buffer: &mut Vec<u8>) {
267        const HEADER_SIZE: usize = 6;
268
269        if let Some(all_bytes) = self.all_bytes {
270            buffer.extend_from_slice(all_bytes);
271            return;
272        }
273
274        let start_offset = buffer.len();
275        buffer.write_u16(0); // version
276        let record_count = self.parsed_names.len();
277        buffer.write_u16(record_count.try_into().expect("record_count overflow"));
278        let storage_offset = HEADER_SIZE + NameRecord::BYTE_SIZE * record_count;
279        buffer.write_u16(storage_offset.try_into().expect("storage_offset overflow"));
280
281        let (string_data, u16_ranges) =
282            Self::intern_strings(self.parsed_names.iter().map(|(&id, s)| (id, s.as_str())));
283
284        for (&id, range) in self.parsed_names.keys().zip(u16_ranges) {
285            let len = (range.end - range.start) * 2;
286            let len = u16::try_from(len).expect("len overflow");
287            let offset = range.start * 2;
288            let offset = u16::try_from(offset).expect("offset overflow");
289
290            buffer.write_u16(3); // platform_id = Windows
291            buffer.write_u16(1); // encoding_id = Unicode BMP
292            buffer.write_u16(0x409); // language_id = en_US
293            buffer.write_u16(id);
294            buffer.write_u16(len);
295            buffer.write_u16(offset);
296        }
297
298        debug_assert_eq!(buffer.len() - start_offset, storage_offset);
299        buffer.extend(string_data.into_iter().flat_map(u16::to_be_bytes));
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use test_casing::test_casing;
306
307    use super::*;
308    use crate::{testonly::TestFont, OpenTypeReader};
309
310    #[test]
311    fn interning_strings() {
312        let strings = [(0, "Roboto"), (1, "Roboto Regular"), (2, "Regular")];
313        let (utf16_data, ranges) = NameTable::intern_strings(strings.into_iter());
314        assert_eq!(
315            utf16_data,
316            "Roboto Regular".encode_utf16().collect::<Vec<_>>(),
317        );
318        assert_eq!(ranges, [0..6, 0..14, 7..14]);
319    }
320
321    #[test_casing(5, TestFont::ALL)]
322    fn interning_string_from_font(font: TestFont) {
323        let reader = OpenTypeReader::new(font.bytes).unwrap();
324        let table_cursor = reader.table(TableTag::NAME);
325        let name = NameTable::parse(table_cursor, &[]).unwrap();
326        assert!(!name.parsed_names.is_empty());
327
328        let (utf16_data, ranges) =
329            NameTable::intern_strings(name.parsed_names.iter().map(|(&id, s)| (id, s.as_str())));
330
331        assert!(utf16_data.len() * 2 < table_cursor.bytes().len());
332        for (s, range) in name.parsed_names.values().zip(ranges) {
333            let interned_s = String::from_utf16(&utf16_data[range]).unwrap();
334            assert_eq!(*s, interned_s);
335        }
336    }
337
338    #[test_casing(5, TestFont::ALL)]
339    fn subsetting_roundtrip(font: TestFont) {
340        let reader = OpenTypeReader::new(font.bytes).unwrap();
341        let table_cursor = reader.table(TableTag::NAME);
342        let mut name = NameTable::parse(table_cursor, &[]).unwrap();
343        let original_names = name.parsed_names.clone();
344
345        name.subset(false);
346        let mut buffer = vec![];
347        name.write_to_vec(&mut buffer);
348        let subset_name = NameTable::parse(Cursor::new(&buffer), &[]).unwrap();
349        assert_eq!(subset_name.parsed_names, original_names);
350    }
351
352    #[test]
353    fn modifying_font_version() {
354        let reader = OpenTypeReader::new(TestFont::FIRA_MONO.bytes).unwrap();
355        let table_cursor = reader.table(TableTag::NAME);
356        let mut name = NameTable::parse(table_cursor, &[]).unwrap();
357
358        name.subset(true);
359        assert_eq!(
360            name.parsed().version(),
361            Some("3.111; subset w/ font-subset 0.1.0")
362        );
363
364        let mut buffer = vec![];
365        name.write_to_vec(&mut buffer);
366        let subset_name = NameTable::parse(Cursor::new(&buffer), &[]).unwrap();
367
368        assert_eq!(
369            subset_name.parsed_names[&NameRecord::VERSION_ID],
370            "Version 3.111; subset w/ font-subset 0.1.0"
371        );
372        assert_eq!(
373            subset_name.parsed().version(),
374            Some("3.111; subset w/ font-subset 0.1.0")
375        );
376    }
377}