use serde::{Deserialize, Serialize}; /// Parsed metadata from a single font file #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FontMeta { /// Font family name from name table (nameID 16 preferred, fallback nameID 1) pub family: String, /// usWeightClass from OS/2 table (100-900) pub weight: u16, /// fsSelection bit 0 from OS/2 table pub italic: bool, pub units_per_em: u16, /// sTypoAscender from OS/2 table pub ascender: i16, /// sTypoDescender from OS/2 table pub descender: i16, } /// Variant key for looking up a specific font within a family #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] pub struct FontVariantKey { pub weight: u16, pub italic: bool, } /// Summary of a font family with all its available variants #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FontFamilyInfo { pub family: String, pub variants: Vec, } impl FontMeta { pub fn variant_key(&self) -> FontVariantKey { FontVariantKey { weight: self.weight, italic: self.italic, } } pub fn is_bold(&self) -> bool { self.weight >= 700 } } // --------------------------------------------------------------------------- // Internal helpers // --------------------------------------------------------------------------- /// Read a big-endian u16 from `data` at `offset`. Returns `None` if out of bounds. fn read_u16(data: &[u8], offset: usize) -> Option { if offset + 2 > data.len() { return None; } Some(u16::from_be_bytes([data[offset], data[offset + 1]])) } /// Read a big-endian i16 from `data` at `offset`. Returns `None` if out of bounds. fn read_i16(data: &[u8], offset: usize) -> Option { if offset + 2 > data.len() { return None; } Some(i16::from_be_bytes([data[offset], data[offset + 1]])) } /// Read a big-endian u32 from `data` at `offset`. Returns `None` if out of bounds. fn read_u32(data: &[u8], offset: usize) -> Option { if offset + 4 > data.len() { return None; } Some(u32::from_be_bytes([ data[offset], data[offset + 1], data[offset + 2], data[offset + 3], ])) } /// Find a table in the font's table directory by its 4-byte ASCII tag. /// Returns `(offset, length)` into `data`. fn find_table(data: &[u8], tag: &[u8; 4]) -> Option<(usize, usize)> { // Offset table (first 12 bytes): // 0: sfVersion (u32) — 0x00010000 for TrueType, 'OTTO' for CFF // 4: numTables (u16) // 6: searchRange (u16) // 8: entrySelector (u16) // 10: rangeShift (u16) let num_tables = read_u16(data, 4)? as usize; // Table directory starts at offset 12, each entry is 16 bytes: // 0: tag (4 bytes) // 4: checksum (u32) // 8: offset (u32) // 12: length (u32) for i in 0..num_tables { let entry_offset = 12 + i * 16; if entry_offset + 16 > data.len() { return None; } if &data[entry_offset..entry_offset + 4] == tag { let table_offset = read_u32(data, entry_offset + 8)? as usize; let table_length = read_u32(data, entry_offset + 12)? as usize; // Basic sanity check if table_offset.checked_add(table_length)? > data.len() { return None; } return Some((table_offset, table_length)); } } None } /// Decode a UTF-16BE byte slice into a `String`. fn decode_utf16be(raw: &[u8]) -> Option { if raw.len() % 2 != 0 { return None; } let code_units: Vec = raw .chunks_exact(2) .map(|c| u16::from_be_bytes([c[0], c[1]])) .collect(); String::from_utf16(&code_units).ok() } /// Decode a MacRoman (platform 1, encoding 0) byte slice into a `String`. /// MacRoman overlaps with ASCII for 0x00–0x7F; we accept those and replace /// high bytes with the Unicode replacement character for simplicity, since /// font family names are almost always pure ASCII. fn decode_mac_roman(raw: &[u8]) -> String { raw.iter() .map(|&b| { if b < 0x80 { b as char } else { // Simplified: map non-ASCII MacRoman bytes to replacement char. // Full MacRoman table not needed for typical font family names. '\u{FFFD}' } }) .collect() } /// Extract the font family name from the `name` table. /// /// Prefers nameID 16 (Typographic Family Name) over nameID 1 (Font Family). /// Among platforms, prefers Windows (3) and Unicode (0) for UTF-16BE, falls /// back to Macintosh (1) for MacRoman. fn read_family_name(data: &[u8], table_offset: usize, table_length: usize) -> Option { let tbl = table_offset; // name table header: // 0: format (u16) // 2: count (u16) // 4: stringOffset (u16) — offset from start of table to string storage let count = read_u16(data, tbl + 2)? as usize; let string_offset = read_u16(data, tbl + 4)? as usize; let storage_base = tbl + string_offset; // Each name record (12 bytes, starting at tbl + 6): // 0: platformID (u16) // 2: encodingID (u16) // 4: languageID (u16) // 6: nameID (u16) // 8: length (u16) // 10: offset (u16) — from storage_base // We collect candidates, preferring nameID 16 over 1, and Windows/Unicode // over Mac. let mut best: Option = None; let mut best_priority: u8 = 0; // higher = better for i in 0..count { let rec = tbl + 6 + i * 12; if rec + 12 > tbl + table_length { break; } let platform_id = read_u16(data, rec)?; let encoding_id = read_u16(data, rec + 2)?; let name_id = read_u16(data, rec + 6)?; let str_length = read_u16(data, rec + 8)? as usize; let str_offset = read_u16(data, rec + 10)? as usize; // Only interested in nameID 1 (Font Family) or 16 (Typographic Family) if name_id != 1 && name_id != 16 { continue; } let name_priority = if name_id == 16 { 4 } else { 0 }; let abs_start = storage_base + str_offset; let abs_end = abs_start + str_length; if abs_end > data.len() { continue; } let raw = &data[abs_start..abs_end]; let (decoded, platform_priority) = match platform_id { // Platform 0 — Unicode: UTF-16BE 0 => { if let Some(s) = decode_utf16be(raw) { (s, 2u8) } else { continue; } } // Platform 1 — Macintosh, encoding 0 = MacRoman 1 if encoding_id == 0 => (decode_mac_roman(raw), 1u8), // Platform 3 — Windows, encoding 1 = Unicode BMP (UTF-16BE) 3 if encoding_id == 1 => { if let Some(s) = decode_utf16be(raw) { (s, 3u8) } else { continue; } } _ => continue, }; let priority = name_priority + platform_priority; if priority > best_priority { best_priority = priority; best = Some(decoded); } } best } /// Parse font metadata from raw TTF/OTF bytes. /// /// Returns `None` if the data is too short, tables are missing, or offsets /// point outside the buffer. pub fn parse_font_meta(data: &[u8]) -> Option { // Minimum: 12-byte offset table header if data.len() < 12 { return None; } // ---- OS/2 table ---- let (os2_off, os2_len) = find_table(data, b"OS/2")?; // Need at least 72 bytes for sTypoDescender (offset 70, 2 bytes) if os2_len < 72 { return None; } let weight = read_u16(data, os2_off + 4)?; let fs_selection = read_u16(data, os2_off + 62)?; let italic = (fs_selection & 1) != 0; let ascender = read_i16(data, os2_off + 68)?; let descender = read_i16(data, os2_off + 70)?; // ---- head table ---- let (head_off, head_len) = find_table(data, b"head")?; // unitsPerEm is at offset 18 (2 bytes), so need at least 20 bytes if head_len < 20 { return None; } let units_per_em = read_u16(data, head_off + 18)?; // ---- name table ---- let (name_off, name_len) = find_table(data, b"name")?; let family = read_family_name(data, name_off, name_len)?; Some(FontMeta { family, weight, italic, units_per_em, ascender, descender, }) } #[cfg(test)] mod tests { use super::*; #[test] fn find_table_returns_none_on_empty() { assert!(find_table(&[], b"head").is_none()); } #[test] fn parse_font_meta_returns_none_on_garbage() { assert!(parse_font_meta(&[0u8; 11]).is_none()); assert!(parse_font_meta(&[0u8; 64]).is_none()); } #[test] fn variant_key_and_is_bold() { let meta = FontMeta { family: "Test".into(), weight: 700, italic: true, units_per_em: 1000, ascender: 800, descender: -200, }; assert!(meta.is_bold()); assert!(meta.italic); let key = meta.variant_key(); assert_eq!(key.weight, 700); assert!(key.italic); let regular = FontMeta { weight: 400, italic: false, ..meta.clone() }; assert!(!regular.is_bold()); } #[test] fn decode_utf16be_basic() { // "AB" in UTF-16BE let raw = [0x00, 0x41, 0x00, 0x42]; assert_eq!(decode_utf16be(&raw).unwrap(), "AB"); } #[test] fn decode_utf16be_odd_length_returns_none() { assert!(decode_utf16be(&[0x00, 0x41, 0x00]).is_none()); } #[test] fn decode_mac_roman_ascii() { let raw = b"Noto Sans"; assert_eq!(decode_mac_roman(raw), "Noto Sans"); } }