#!/usr/bin/env python3 """ Build a NotoSansCJK subset font containing all characters used by the zh, ja, and ko translation files, plus common CJK punctuation and symbols. Usage: python3 scripts/build_cjk_subset.py Requires: pip install fonttools brotli """ import json import os from fontTools.ttLib import TTFont from fontTools import subset as ftsubset ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LANG_DIR = os.path.join(ROOT, 'res', 'lang') SOURCE_FONT = '/tmp/NotoSansCJKsc-Regular.otf' OUTPUT_FONT = os.path.join(ROOT, 'res', 'fonts', 'NotoSansCJK-Subset.ttf') # Collect all characters used in CJK translation files needed = set() for lang in ['zh', 'ja', 'ko']: path = os.path.join(LANG_DIR, f'{lang}.json') if not os.path.exists(path): continue with open(path, 'r', encoding='utf-8') as f: data = json.load(f) for v in data.values(): if isinstance(v, str): for c in v: cp = ord(c) if cp > 0x7F: # non-ASCII only (ASCII handled by Ubuntu font) needed.add(cp) # Also add common CJK ranges that future translations might use: # - CJK punctuation and symbols (3000-303F) # - Hiragana (3040-309F) # - Katakana (30A0-30FF) # - Bopomofo (3100-312F) # - CJK quotation marks, brackets for cp in range(0x3000, 0x3100): needed.add(cp) for cp in range(0x3100, 0x3130): needed.add(cp) # Fullwidth ASCII variants (commonly mixed in CJK text) for cp in range(0xFF01, 0xFF5F): needed.add(cp) print(f"Total non-ASCII characters to include: {len(needed)}") # Check which of these the source font supports font = TTFont(SOURCE_FONT) cmap = font.getBestCmap() supportable = needed & set(cmap.keys()) unsupported = needed - set(cmap.keys()) print(f"Supported by source font: {len(supportable)}") if unsupported: print(f"Not in source font (will use fallback): {len(unsupported)}") for cp in sorted(unsupported)[:10]: print(f" U+{cp:04X} {chr(cp)}") # Build the subset using pyftsubset CLI-style API args = [ SOURCE_FONT, f'--output-file={OUTPUT_FONT}', f'--unicodes={",".join(f"U+{cp:04X}" for cp in sorted(supportable))}', '--no-hinting', '--desubroutinize', ] ftsubset.main(args) # Convert CFF outlines to TrueType (glyf) outlines. # stb_truetype (used by ImGui) doesn't handle CID-keyed CFF fonts properly. from fontTools.pens.cu2quPen import Cu2QuPen from fontTools.pens.ttGlyphPen import TTGlyphPen from fontTools.ttLib import newTable tmp_otf = OUTPUT_FONT + '.tmp.otf' os.rename(OUTPUT_FONT, tmp_otf) conv = TTFont(tmp_otf) if 'CFF ' in conv: print("Converting CFF -> TrueType outlines...") glyphOrder = conv.getGlyphOrder() glyphSet = conv.getGlyphSet() glyf_table = newTable("glyf") glyf_table.glyphs = {} glyf_table.glyphOrder = glyphOrder loca_table = newTable("loca") from fontTools.ttLib.tables._g_l_y_f import Glyph as TTGlyph for gname in glyphOrder: try: ttPen = TTGlyphPen(glyphSet) cu2quPen = Cu2QuPen(ttPen, max_err=1.0, reverse_direction=True) glyphSet[gname].draw(cu2quPen) glyf_table.glyphs[gname] = ttPen.glyph() except Exception: glyf_table.glyphs[gname] = TTGlyph() del conv['CFF '] if 'VORG' in conv: del conv['VORG'] conv['glyf'] = glyf_table conv['loca'] = loca_table conv['head'].indexToLocFormat = 1 if 'maxp' in conv: conv['maxp'].version = 0x00010000 conv.sfntVersion = "\x00\x01\x00\x00" conv.save(OUTPUT_FONT) conv.close() os.remove(tmp_otf) size = os.path.getsize(OUTPUT_FONT) print(f"\nOutput: {OUTPUT_FONT}") print(f"Size: {size / 1024:.0f} KB") # Verify verify = TTFont(OUTPUT_FONT) verify_cmap = set(verify.getBestCmap().keys()) still_missing = needed - verify_cmap print(f"Verified glyphs in subset: {len(verify_cmap)}") if still_missing: # These are chars not in the source font - expected for some Hangul/Hiragana print(f"Not coverable by this font: {len(still_missing)} (need additional font)") for cp in sorted(still_missing)[:10]: print(f" U+{cp:04X} {chr(cp)}") else: print("All needed characters are covered!")