Files
ObsidianDragon/scripts/build_cjk_subset.py
DanS fbdba1a001 feat: CJK font rendering, force quit confirmation, settings i18n
- Rebuild CJK font subset (1421 glyphs) and convert CFF→TTF for
  stb_truetype compatibility, fixing Chinese/Japanese/Korean rendering
- Add force quit confirmation dialog with cancel/confirm actions
- Show force quit tooltip immediately on hover (no delay)
- Translate hardcoded English strings in settings dropdowns
  (auto-lock timeouts, slider "Off" labels)
- Fix mojibake en-dashes in 7 translation JSON files
- Add helper scripts: build_cjk_subset, convert_cjk_to_ttf,
  check_font_coverage, fix_mojibake
2026-04-12 10:32:58 -05:00

132 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
Build a NotoSansCJK subset font containing all characters used by
the zh, ja, and ko translation files, plus common CJK punctuation
and symbols.
Usage:
python3 scripts/build_cjk_subset.py
Requires: pip install fonttools brotli
"""
import json
import os
from fontTools.ttLib import TTFont
from fontTools import subset as ftsubset
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LANG_DIR = os.path.join(ROOT, 'res', 'lang')
SOURCE_FONT = '/tmp/NotoSansCJKsc-Regular.otf'
OUTPUT_FONT = os.path.join(ROOT, 'res', 'fonts', 'NotoSansCJK-Subset.ttf')
# Collect all characters used in CJK translation files
needed = set()
for lang in ['zh', 'ja', 'ko']:
path = os.path.join(LANG_DIR, f'{lang}.json')
if not os.path.exists(path):
continue
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
for v in data.values():
if isinstance(v, str):
for c in v:
cp = ord(c)
if cp > 0x7F: # non-ASCII only (ASCII handled by Ubuntu font)
needed.add(cp)
# Also add common CJK ranges that future translations might use:
# - CJK punctuation and symbols (3000-303F)
# - Hiragana (3040-309F)
# - Katakana (30A0-30FF)
# - Bopomofo (3100-312F)
# - CJK quotation marks, brackets
for cp in range(0x3000, 0x3100):
needed.add(cp)
for cp in range(0x3100, 0x3130):
needed.add(cp)
# Fullwidth ASCII variants (commonly mixed in CJK text)
for cp in range(0xFF01, 0xFF5F):
needed.add(cp)
print(f"Total non-ASCII characters to include: {len(needed)}")
# Check which of these the source font supports
font = TTFont(SOURCE_FONT)
cmap = font.getBestCmap()
supportable = needed & set(cmap.keys())
unsupported = needed - set(cmap.keys())
print(f"Supported by source font: {len(supportable)}")
if unsupported:
print(f"Not in source font (will use fallback): {len(unsupported)}")
for cp in sorted(unsupported)[:10]:
print(f" U+{cp:04X} {chr(cp)}")
# Build the subset using pyftsubset CLI-style API
args = [
SOURCE_FONT,
f'--output-file={OUTPUT_FONT}',
f'--unicodes={",".join(f"U+{cp:04X}" for cp in sorted(supportable))}',
'--no-hinting',
'--desubroutinize',
]
ftsubset.main(args)
# Convert CFF outlines to TrueType (glyf) outlines.
# stb_truetype (used by ImGui) doesn't handle CID-keyed CFF fonts properly.
from fontTools.pens.cu2quPen import Cu2QuPen
from fontTools.pens.ttGlyphPen import TTGlyphPen
from fontTools.ttLib import newTable
tmp_otf = OUTPUT_FONT + '.tmp.otf'
os.rename(OUTPUT_FONT, tmp_otf)
conv = TTFont(tmp_otf)
if 'CFF ' in conv:
print("Converting CFF -> TrueType outlines...")
glyphOrder = conv.getGlyphOrder()
glyphSet = conv.getGlyphSet()
glyf_table = newTable("glyf")
glyf_table.glyphs = {}
glyf_table.glyphOrder = glyphOrder
loca_table = newTable("loca")
from fontTools.ttLib.tables._g_l_y_f import Glyph as TTGlyph
for gname in glyphOrder:
try:
ttPen = TTGlyphPen(glyphSet)
cu2quPen = Cu2QuPen(ttPen, max_err=1.0, reverse_direction=True)
glyphSet[gname].draw(cu2quPen)
glyf_table.glyphs[gname] = ttPen.glyph()
except Exception:
glyf_table.glyphs[gname] = TTGlyph()
del conv['CFF ']
if 'VORG' in conv:
del conv['VORG']
conv['glyf'] = glyf_table
conv['loca'] = loca_table
conv['head'].indexToLocFormat = 1
if 'maxp' in conv:
conv['maxp'].version = 0x00010000
conv.sfntVersion = "\x00\x01\x00\x00"
conv.save(OUTPUT_FONT)
conv.close()
os.remove(tmp_otf)
size = os.path.getsize(OUTPUT_FONT)
print(f"\nOutput: {OUTPUT_FONT}")
print(f"Size: {size / 1024:.0f} KB")
# Verify
verify = TTFont(OUTPUT_FONT)
verify_cmap = set(verify.getBestCmap().keys())
still_missing = needed - verify_cmap
print(f"Verified glyphs in subset: {len(verify_cmap)}")
if still_missing:
# These are chars not in the source font - expected for some Hangul/Hiragana
print(f"Not coverable by this font: {len(still_missing)} (need additional font)")
for cp in sorted(still_missing)[:10]:
print(f" U+{cp:04X} {chr(cp)}")
else:
print("All needed characters are covered!")