mirror of
https://github.com/duhanbalci/dreport.git
synced 2026-07-01 18:39:16 +00:00
331 lines
9.9 KiB
Rust
331 lines
9.9 KiB
Rust
use serde::{Deserialize, Serialize};
|
||
|
||
/// Parsed metadata from a single font file
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct FontMeta {
|
||
/// Font family name from name table (nameID 16 preferred, fallback nameID 1)
|
||
pub family: String,
|
||
/// usWeightClass from OS/2 table (100-900)
|
||
pub weight: u16,
|
||
/// fsSelection bit 0 from OS/2 table
|
||
pub italic: bool,
|
||
pub units_per_em: u16,
|
||
/// sTypoAscender from OS/2 table
|
||
pub ascender: i16,
|
||
/// sTypoDescender from OS/2 table
|
||
pub descender: i16,
|
||
}
|
||
|
||
/// Variant key for looking up a specific font within a family
|
||
#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
|
||
pub struct FontVariantKey {
|
||
pub weight: u16,
|
||
pub italic: bool,
|
||
}
|
||
|
||
/// Summary of a font family with all its available variants
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct FontFamilyInfo {
|
||
pub family: String,
|
||
pub variants: Vec<FontVariantKey>,
|
||
}
|
||
|
||
impl FontMeta {
|
||
pub fn variant_key(&self) -> FontVariantKey {
|
||
FontVariantKey {
|
||
weight: self.weight,
|
||
italic: self.italic,
|
||
}
|
||
}
|
||
|
||
pub fn is_bold(&self) -> bool {
|
||
self.weight >= 700
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Internal helpers
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Read a big-endian u16 from `data` at `offset`. Returns `None` if out of bounds.
|
||
fn read_u16(data: &[u8], offset: usize) -> Option<u16> {
|
||
if offset + 2 > data.len() {
|
||
return None;
|
||
}
|
||
Some(u16::from_be_bytes([data[offset], data[offset + 1]]))
|
||
}
|
||
|
||
/// Read a big-endian i16 from `data` at `offset`. Returns `None` if out of bounds.
|
||
fn read_i16(data: &[u8], offset: usize) -> Option<i16> {
|
||
if offset + 2 > data.len() {
|
||
return None;
|
||
}
|
||
Some(i16::from_be_bytes([data[offset], data[offset + 1]]))
|
||
}
|
||
|
||
/// Read a big-endian u32 from `data` at `offset`. Returns `None` if out of bounds.
|
||
fn read_u32(data: &[u8], offset: usize) -> Option<u32> {
|
||
if offset + 4 > data.len() {
|
||
return None;
|
||
}
|
||
Some(u32::from_be_bytes([
|
||
data[offset],
|
||
data[offset + 1],
|
||
data[offset + 2],
|
||
data[offset + 3],
|
||
]))
|
||
}
|
||
|
||
/// Find a table in the font's table directory by its 4-byte ASCII tag.
|
||
/// Returns `(offset, length)` into `data`.
|
||
fn find_table(data: &[u8], tag: &[u8; 4]) -> Option<(usize, usize)> {
|
||
// Offset table (first 12 bytes):
|
||
// 0: sfVersion (u32) — 0x00010000 for TrueType, 'OTTO' for CFF
|
||
// 4: numTables (u16)
|
||
// 6: searchRange (u16)
|
||
// 8: entrySelector (u16)
|
||
// 10: rangeShift (u16)
|
||
let num_tables = read_u16(data, 4)? as usize;
|
||
|
||
// Table directory starts at offset 12, each entry is 16 bytes:
|
||
// 0: tag (4 bytes)
|
||
// 4: checksum (u32)
|
||
// 8: offset (u32)
|
||
// 12: length (u32)
|
||
for i in 0..num_tables {
|
||
let entry_offset = 12 + i * 16;
|
||
if entry_offset + 16 > data.len() {
|
||
return None;
|
||
}
|
||
|
||
if &data[entry_offset..entry_offset + 4] == tag {
|
||
let table_offset = read_u32(data, entry_offset + 8)? as usize;
|
||
let table_length = read_u32(data, entry_offset + 12)? as usize;
|
||
// Basic sanity check
|
||
if table_offset.checked_add(table_length)? > data.len() {
|
||
return None;
|
||
}
|
||
return Some((table_offset, table_length));
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
/// Decode a UTF-16BE byte slice into a `String`.
|
||
fn decode_utf16be(raw: &[u8]) -> Option<String> {
|
||
if raw.len() % 2 != 0 {
|
||
return None;
|
||
}
|
||
let code_units: Vec<u16> = raw
|
||
.chunks_exact(2)
|
||
.map(|c| u16::from_be_bytes([c[0], c[1]]))
|
||
.collect();
|
||
String::from_utf16(&code_units).ok()
|
||
}
|
||
|
||
/// Decode a MacRoman (platform 1, encoding 0) byte slice into a `String`.
|
||
/// MacRoman overlaps with ASCII for 0x00–0x7F; we accept those and replace
|
||
/// high bytes with the Unicode replacement character for simplicity, since
|
||
/// font family names are almost always pure ASCII.
|
||
fn decode_mac_roman(raw: &[u8]) -> String {
|
||
raw.iter()
|
||
.map(|&b| {
|
||
if b < 0x80 {
|
||
b as char
|
||
} else {
|
||
// Simplified: map non-ASCII MacRoman bytes to replacement char.
|
||
// Full MacRoman table not needed for typical font family names.
|
||
'\u{FFFD}'
|
||
}
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
/// Extract the font family name from the `name` table.
|
||
///
|
||
/// Prefers nameID 16 (Typographic Family Name) over nameID 1 (Font Family).
|
||
/// Among platforms, prefers Windows (3) and Unicode (0) for UTF-16BE, falls
|
||
/// back to Macintosh (1) for MacRoman.
|
||
fn read_family_name(data: &[u8], table_offset: usize, table_length: usize) -> Option<String> {
|
||
let tbl = table_offset;
|
||
// name table header:
|
||
// 0: format (u16)
|
||
// 2: count (u16)
|
||
// 4: stringOffset (u16) — offset from start of table to string storage
|
||
let count = read_u16(data, tbl + 2)? as usize;
|
||
let string_offset = read_u16(data, tbl + 4)? as usize;
|
||
let storage_base = tbl + string_offset;
|
||
|
||
// Each name record (12 bytes, starting at tbl + 6):
|
||
// 0: platformID (u16)
|
||
// 2: encodingID (u16)
|
||
// 4: languageID (u16)
|
||
// 6: nameID (u16)
|
||
// 8: length (u16)
|
||
// 10: offset (u16) — from storage_base
|
||
|
||
// We collect candidates, preferring nameID 16 over 1, and Windows/Unicode
|
||
// over Mac.
|
||
let mut best: Option<String> = None;
|
||
let mut best_priority: u8 = 0; // higher = better
|
||
|
||
for i in 0..count {
|
||
let rec = tbl + 6 + i * 12;
|
||
if rec + 12 > tbl + table_length {
|
||
break;
|
||
}
|
||
|
||
let platform_id = read_u16(data, rec)?;
|
||
let encoding_id = read_u16(data, rec + 2)?;
|
||
let name_id = read_u16(data, rec + 6)?;
|
||
let str_length = read_u16(data, rec + 8)? as usize;
|
||
let str_offset = read_u16(data, rec + 10)? as usize;
|
||
|
||
// Only interested in nameID 1 (Font Family) or 16 (Typographic Family)
|
||
if name_id != 1 && name_id != 16 {
|
||
continue;
|
||
}
|
||
|
||
let name_priority = if name_id == 16 { 4 } else { 0 };
|
||
|
||
let abs_start = storage_base + str_offset;
|
||
let abs_end = abs_start + str_length;
|
||
if abs_end > data.len() {
|
||
continue;
|
||
}
|
||
let raw = &data[abs_start..abs_end];
|
||
|
||
let (decoded, platform_priority) = match platform_id {
|
||
// Platform 0 — Unicode: UTF-16BE
|
||
0 => {
|
||
if let Some(s) = decode_utf16be(raw) {
|
||
(s, 2u8)
|
||
} else {
|
||
continue;
|
||
}
|
||
}
|
||
// Platform 1 — Macintosh, encoding 0 = MacRoman
|
||
1 if encoding_id == 0 => (decode_mac_roman(raw), 1u8),
|
||
// Platform 3 — Windows, encoding 1 = Unicode BMP (UTF-16BE)
|
||
3 if encoding_id == 1 => {
|
||
if let Some(s) = decode_utf16be(raw) {
|
||
(s, 3u8)
|
||
} else {
|
||
continue;
|
||
}
|
||
}
|
||
_ => continue,
|
||
};
|
||
|
||
let priority = name_priority + platform_priority;
|
||
if priority > best_priority {
|
||
best_priority = priority;
|
||
best = Some(decoded);
|
||
}
|
||
}
|
||
|
||
best
|
||
}
|
||
|
||
/// Parse font metadata from raw TTF/OTF bytes.
|
||
///
|
||
/// Returns `None` if the data is too short, tables are missing, or offsets
|
||
/// point outside the buffer.
|
||
pub fn parse_font_meta(data: &[u8]) -> Option<FontMeta> {
|
||
// Minimum: 12-byte offset table header
|
||
if data.len() < 12 {
|
||
return None;
|
||
}
|
||
|
||
// ---- OS/2 table ----
|
||
let (os2_off, os2_len) = find_table(data, b"OS/2")?;
|
||
// Need at least 72 bytes for sTypoDescender (offset 70, 2 bytes)
|
||
if os2_len < 72 {
|
||
return None;
|
||
}
|
||
let weight = read_u16(data, os2_off + 4)?;
|
||
let fs_selection = read_u16(data, os2_off + 62)?;
|
||
let italic = (fs_selection & 1) != 0;
|
||
let ascender = read_i16(data, os2_off + 68)?;
|
||
let descender = read_i16(data, os2_off + 70)?;
|
||
|
||
// ---- head table ----
|
||
let (head_off, head_len) = find_table(data, b"head")?;
|
||
// unitsPerEm is at offset 18 (2 bytes), so need at least 20 bytes
|
||
if head_len < 20 {
|
||
return None;
|
||
}
|
||
let units_per_em = read_u16(data, head_off + 18)?;
|
||
|
||
// ---- name table ----
|
||
let (name_off, name_len) = find_table(data, b"name")?;
|
||
let family = read_family_name(data, name_off, name_len)?;
|
||
|
||
Some(FontMeta {
|
||
family,
|
||
weight,
|
||
italic,
|
||
units_per_em,
|
||
ascender,
|
||
descender,
|
||
})
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn find_table_returns_none_on_empty() {
|
||
assert!(find_table(&[], b"head").is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn parse_font_meta_returns_none_on_garbage() {
|
||
assert!(parse_font_meta(&[0u8; 11]).is_none());
|
||
assert!(parse_font_meta(&[0u8; 64]).is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn variant_key_and_is_bold() {
|
||
let meta = FontMeta {
|
||
family: "Test".into(),
|
||
weight: 700,
|
||
italic: true,
|
||
units_per_em: 1000,
|
||
ascender: 800,
|
||
descender: -200,
|
||
};
|
||
assert!(meta.is_bold());
|
||
assert!(meta.italic);
|
||
let key = meta.variant_key();
|
||
assert_eq!(key.weight, 700);
|
||
assert!(key.italic);
|
||
|
||
let regular = FontMeta {
|
||
weight: 400,
|
||
italic: false,
|
||
..meta.clone()
|
||
};
|
||
assert!(!regular.is_bold());
|
||
}
|
||
|
||
#[test]
|
||
fn decode_utf16be_basic() {
|
||
// "AB" in UTF-16BE
|
||
let raw = [0x00, 0x41, 0x00, 0x42];
|
||
assert_eq!(decode_utf16be(&raw).unwrap(), "AB");
|
||
}
|
||
|
||
#[test]
|
||
fn decode_utf16be_odd_length_returns_none() {
|
||
assert!(decode_utf16be(&[0x00, 0x41, 0x00]).is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn decode_mac_roman_ascii() {
|
||
let raw = b"Noto Sans";
|
||
assert_eq!(decode_mac_roman(raw), "Noto Sans");
|
||
}
|
||
}
|