feat: CJK font rendering, force quit confirmation, settings i18n

- Rebuild CJK font subset (1421 glyphs) and convert CFF→TTF for stb_truetype compatibility, fixing Chinese/Japanese/Korean rendering - Add force quit confirmation dialog with cancel/confirm actions - Show force quit tooltip immediately on hover (no delay) - Translate hardcoded English strings in settings dropdowns (auto-lock timeouts, slider "Off" labels) - Fix mojibake en-dashes in 7 translation JSON files - Add helper scripts: build_cjk_subset, convert_cjk_to_ttf, check_font_coverage, fix_mojibake
2026-04-12 10:32:58 -05:00
parent 821c54ba2b
commit fbdba1a001
28 changed files with 5471 additions and 4909 deletions
--- a/scripts/build_cjk_subset.py
+++ b/scripts/build_cjk_subset.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Build a NotoSansCJK subset font containing all characters used by
+the zh, ja, and ko translation files, plus common CJK punctuation
+and symbols.
+
+Usage:
+    python3 scripts/build_cjk_subset.py
+
+Requires: pip install fonttools brotli
+"""
+import json
+import os
+from fontTools.ttLib import TTFont
+from fontTools import subset as ftsubset
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+LANG_DIR = os.path.join(ROOT, 'res', 'lang')
+SOURCE_FONT = '/tmp/NotoSansCJKsc-Regular.otf'
+OUTPUT_FONT = os.path.join(ROOT, 'res', 'fonts', 'NotoSansCJK-Subset.ttf')
+
+# Collect all characters used in CJK translation files
+needed = set()
+for lang in ['zh', 'ja', 'ko']:
+    path = os.path.join(LANG_DIR, f'{lang}.json')
+    if not os.path.exists(path):
+        continue
+    with open(path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    for v in data.values():
+        if isinstance(v, str):
+            for c in v:
+                cp = ord(c)
+                if cp > 0x7F:  # non-ASCII only (ASCII handled by Ubuntu font)
+                    needed.add(cp)
+
+# Also add common CJK ranges that future translations might use:
+# - CJK punctuation and symbols (3000-303F)
+# - Hiragana (3040-309F)
+# - Katakana (30A0-30FF)
+# - Bopomofo (3100-312F)
+# - CJK quotation marks, brackets
+for cp in range(0x3000, 0x3100):
+    needed.add(cp)
+for cp in range(0x3100, 0x3130):
+    needed.add(cp)
+# Fullwidth ASCII variants (commonly mixed in CJK text)
+for cp in range(0xFF01, 0xFF5F):
+    needed.add(cp)
+
+print(f"Total non-ASCII characters to include: {len(needed)}")
+
+# Check which of these the source font supports
+font = TTFont(SOURCE_FONT)
+cmap = font.getBestCmap()
+supportable = needed & set(cmap.keys())
+unsupported = needed - set(cmap.keys())
+
+print(f"Supported by source font: {len(supportable)}")
+if unsupported:
+    print(f"Not in source font (will use fallback): {len(unsupported)}")
+    for cp in sorted(unsupported)[:10]:
+        print(f"  U+{cp:04X} {chr(cp)}")
+
+# Build the subset using pyftsubset CLI-style API
+args = [
+    SOURCE_FONT,
+    f'--output-file={OUTPUT_FONT}',
+    f'--unicodes={",".join(f"U+{cp:04X}" for cp in sorted(supportable))}',
+    '--no-hinting',
+    '--desubroutinize',
+]
+
+ftsubset.main(args)
+
+# Convert CFF outlines to TrueType (glyf) outlines.
+# stb_truetype (used by ImGui) doesn't handle CID-keyed CFF fonts properly.
+from fontTools.pens.cu2quPen import Cu2QuPen
+from fontTools.pens.ttGlyphPen import TTGlyphPen
+from fontTools.ttLib import newTable
+
+tmp_otf = OUTPUT_FONT + '.tmp.otf'
+os.rename(OUTPUT_FONT, tmp_otf)
+
+conv = TTFont(tmp_otf)
+if 'CFF ' in conv:
+    print("Converting CFF -> TrueType outlines...")
+    glyphOrder = conv.getGlyphOrder()
+    glyphSet = conv.getGlyphSet()
+    glyf_table = newTable("glyf")
+    glyf_table.glyphs = {}
+    glyf_table.glyphOrder = glyphOrder
+    loca_table = newTable("loca")
+    from fontTools.ttLib.tables._g_l_y_f import Glyph as TTGlyph
+    for gname in glyphOrder:
+        try:
+            ttPen = TTGlyphPen(glyphSet)
+            cu2quPen = Cu2QuPen(ttPen, max_err=1.0, reverse_direction=True)
+            glyphSet[gname].draw(cu2quPen)
+            glyf_table.glyphs[gname] = ttPen.glyph()
+        except Exception:
+            glyf_table.glyphs[gname] = TTGlyph()
+    del conv['CFF ']
+    if 'VORG' in conv:
+        del conv['VORG']
+    conv['glyf'] = glyf_table
+    conv['loca'] = loca_table
+    conv['head'].indexToLocFormat = 1
+    if 'maxp' in conv:
+        conv['maxp'].version = 0x00010000
+    conv.sfntVersion = "\x00\x01\x00\x00"
+conv.save(OUTPUT_FONT)
+conv.close()
+os.remove(tmp_otf)
+
+size = os.path.getsize(OUTPUT_FONT)
+print(f"\nOutput: {OUTPUT_FONT}")
+print(f"Size: {size / 1024:.0f} KB")
+
+# Verify
+verify = TTFont(OUTPUT_FONT)
+verify_cmap = set(verify.getBestCmap().keys())
+still_missing = needed - verify_cmap
+print(f"Verified glyphs in subset: {len(verify_cmap)}")
+if still_missing:
+    # These are chars not in the source font - expected for some Hangul/Hiragana
+    print(f"Not coverable by this font: {len(still_missing)} (need additional font)")
+    for cp in sorted(still_missing)[:10]:
+        print(f"  U+{cp:04X} {chr(cp)}")
+else:
+    print("All needed characters are covered!")
--- a/scripts/check_cjk_coverage.py
+++ b/scripts/check_cjk_coverage.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""Check which characters in translation files fall outside the font glyph ranges."""
+import json
+import unicodedata
+import glob
+import os
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+LANG_DIR = os.path.join(SCRIPT_DIR, '..', 'res', 'lang')
+
+# Glyph ranges from typography.cpp (regular font + CJK merge)
+RANGES = [
+    # Regular font ranges
+    (0x0020, 0x00FF),  # Basic Latin + Latin-1 Supplement
+    (0x0100, 0x024F),  # Latin Extended-A + B
+    (0x0370, 0x03FF),  # Greek and Coptic
+    (0x0400, 0x04FF),  # Cyrillic
+    (0x0500, 0x052F),  # Cyrillic Supplement
+    (0x2000, 0x206F),  # General Punctuation
+    (0x2190, 0x21FF),  # Arrows
+    (0x2200, 0x22FF),  # Mathematical Operators
+    (0x2600, 0x26FF),  # Miscellaneous Symbols
+    # CJK ranges
+    (0x2E80, 0x2FDF),  # CJK Radicals
+    (0x3000, 0x30FF),  # CJK Symbols, Hiragana, Katakana
+    (0x3100, 0x312F),  # Bopomofo
+    (0x31F0, 0x31FF),  # Katakana Extensions
+    (0x3400, 0x4DBF),  # CJK Extension A
+    (0x4E00, 0x9FFF),  # CJK Unified Ideographs
+    (0xAC00, 0xD7AF),  # Hangul Syllables
+    (0xFF00, 0xFFEF),  # Fullwidth Forms
+]
+
+def in_ranges(cp):
+    return any(lo <= cp <= hi for lo, hi in RANGES)
+
+for path in sorted(glob.glob(os.path.join(LANG_DIR, '*.json'))):
+    lang = os.path.basename(path).replace('.json', '')
+    with open(path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    missing = {}
+    for key, val in data.items():
+        if not isinstance(val, str):
+            continue
+        for c in val:
+            cp = ord(c)
+            if cp > 0x7F and not in_ranges(cp):
+                if c not in missing:
+                    missing[c] = []
+                missing[c].append(key)
+
+    if missing:
+        print(f"\n=== {lang}.json: {len(missing)} missing characters ===")
+        for c in sorted(missing, key=lambda x: ord(x)):
+            cp = ord(c)
+            name = unicodedata.name(c, 'UNKNOWN')
+            keys = missing[c][:3]
+            key_str = ', '.join(keys)
+            if len(missing[c]) > 3:
+                key_str += f' (+{len(missing[c])-3} more)'
+            print(f"  U+{cp:04X} {c} ({name}) — used in: {key_str}")
+    else:
+        print(f"=== {lang}.json: OK (all characters covered) ===")
--- a/scripts/check_font_coverage.py
+++ b/scripts/check_font_coverage.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""Check which characters needed by translations are missing from bundled fonts."""
+import json
+import os
+from fontTools.ttLib import TTFont
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+FONTS_DIR = os.path.join(ROOT, 'res', 'fonts')
+LANG_DIR = os.path.join(ROOT, 'res', 'lang')
+
+# Load font cmaps
+cjk = TTFont(os.path.join(FONTS_DIR, 'NotoSansCJK-Subset.ttf'))
+cjk_cmap = set(cjk.getBestCmap().keys())
+
+ubuntu = TTFont(os.path.join(FONTS_DIR, 'Ubuntu-R.ttf'))
+ubuntu_cmap = set(ubuntu.getBestCmap().keys())
+
+combined = cjk_cmap | ubuntu_cmap
+
+print(f"CJK subset font glyphs: {len(cjk_cmap)}")
+print(f"Ubuntu font glyphs: {len(ubuntu_cmap)}")
+print(f"Combined: {len(combined)}")
+print()
+
+for lang in ['zh', 'ja', 'ko', 'ru', 'de', 'es', 'fr', 'pt']:
+    path = os.path.join(LANG_DIR, f'{lang}.json')
+    if not os.path.exists(path):
+        continue
+    with open(path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    
+    needed = set()
+    for v in data.values():
+        if isinstance(v, str):
+            for c in v:
+                needed.add(ord(c))
+    
+    missing = sorted(needed - combined)
+    if missing:
+        print(f"{lang}.json: {len(needed)} chars needed, {len(missing)} MISSING")
+        for cp in missing[:20]:
+            c = chr(cp)
+            print(f"  U+{cp:04X} {c}")
+        if len(missing) > 20:
+            print(f"  ... and {len(missing) - 20} more")
+    else:
+        print(f"{lang}.json: OK ({len(needed)} chars, all covered)")
--- a/scripts/convert_cjk_to_ttf.py
+++ b/scripts/convert_cjk_to_ttf.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""Convert CJK subset from CID-keyed CFF/OTF to TrueType/TTF.
+
+stb_truetype (used by ImGui) doesn't handle CID-keyed CFF fonts properly,
+so we need glyf-based TrueType outlines instead.
+
+Two approaches:
+1. Direct CFF->TTF conversion via cu2qu (fontTools)
+2. Download NotoSansSC-Regular.ttf (already TTF) and re-subset
+
+This script tries approach 1 first, falls back to approach 2.
+"""
+
+import os
+import sys
+import json
+import glob
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
+FONT_DIR = os.path.join(PROJECT_ROOT, "res", "fonts")
+LANG_DIR = os.path.join(PROJECT_ROOT, "res", "lang")
+
+SRC_OTF = os.path.join(FONT_DIR, "NotoSansCJK-Subset.otf")
+DST_TTF = os.path.join(FONT_DIR, "NotoSansCJK-Subset.ttf")
+
+
+def get_needed_codepoints():
+    """Collect all unique codepoints from CJK translation files."""
+    codepoints = set()
+    for lang_file in glob.glob(os.path.join(LANG_DIR, "*.json")):
+        with open(lang_file, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        for value in data.values():
+            if isinstance(value, str):
+                for ch in value:
+                    cp = ord(ch)
+                    # Include CJK + Hangul + fullwidth + CJK symbols/kana
+                    if cp >= 0x2E80:
+                        codepoints.add(cp)
+    return codepoints
+
+
+def convert_cff_to_ttf():
+    """Convert existing OTF/CFF font to TTF using fontTools cu2qu."""
+    from fontTools.ttLib import TTFont
+    from fontTools.pens.cu2quPen import Cu2QuPen
+    from fontTools.pens.ttGlyphPen import TTGlyphPen
+
+    print(f"Loading {SRC_OTF}...")
+    font = TTFont(SRC_OTF)
+
+    # Verify it's CFF
+    if "CFF " not in font:
+        print("Font is not CFF, skipping conversion")
+        return False
+
+    cff = font["CFF "]
+    top = cff.cff.topDictIndex[0]
+    print(f"ROS: {getattr(top, 'ROS', None)}")
+    print(f"CID-keyed: {getattr(top, 'FDSelect', None) is not None}")
+
+    glyphOrder = font.getGlyphOrder()
+    print(f"Glyphs: {len(glyphOrder)}")
+
+    # Use fontTools' built-in otf2ttf if available
+    try:
+        from fontTools.otf2ttf import otf_to_ttf
+        otf_to_ttf(font)
+        font.save(DST_TTF)
+        print(f"Saved TTF: {DST_TTF} ({os.path.getsize(DST_TTF)} bytes)")
+        font.close()
+        return True
+    except ImportError:
+        pass
+
+    # Manual conversion using cu2qu
+    print("Using manual CFF->TTF conversion with cu2qu...")
+
+    from fontTools.pens.recordingPen import RecordingPen
+    from fontTools.pens.pointPen import SegmentToPointPen
+    from fontTools import ttLib
+    from fontTools.ttLib.tables._g_l_y_f import Glyph as TTGlyph
+    import struct
+
+    # Get glyph set
+    glyphSet = font.getGlyphSet()
+
+    # Create new glyf table
+    from fontTools.ttLib import newTable
+
+    glyf_table = newTable("glyf")
+    glyf_table.glyphs = {}
+    glyf_table.glyphOrder = glyphOrder
+
+    loca_table = newTable("loca")
+
+    max_error = 1.0  # em-units tolerance for cubic->quadratic
+
+    for gname in glyphOrder:
+        try:
+            ttPen = TTGlyphPen(glyphSet)
+            cu2quPen = Cu2QuPen(ttPen, max_err=max_error, reverse_direction=True)
+            glyphSet[gname].draw(cu2quPen)
+            glyf_table.glyphs[gname] = ttPen.glyph()
+        except Exception as e:
+            # Fallback: empty glyph
+            glyf_table.glyphs[gname] = TTGlyph()
+
+    # Replace CFF with glyf
+    del font["CFF "]
+    if "VORG" in font:
+        del font["VORG"]
+
+    font["glyf"] = glyf_table
+    font["loca"] = loca_table
+
+    # Add required tables for TTF
+    # head table needs indexToLocFormat
+    font["head"].indexToLocFormat = 1  # long format
+
+    # Create maxp for TrueType
+    if "maxp" in font:
+        font["maxp"].version = 0x00010000
+
+    # Update sfntVersion
+    font.sfntVersion = "\x00\x01\x00\x00"  # TrueType
+
+    font.save(DST_TTF)
+    print(f"Saved TTF: {DST_TTF} ({os.path.getsize(DST_TTF)} bytes)")
+    font.close()
+    return True
+
+
+def download_and_subset():
+    """Download NotoSansSC-Regular.ttf and subset it."""
+    import urllib.request
+    from fontTools.ttLib import TTFont
+    from fontTools import subset
+
+    # Google Fonts provides static TTF files
+    url = "https://github.com/notofonts/noto-cjk/raw/main/Sans/SubsetOTF/SC/NotoSansSC-Regular.otf"
+    # Actually, we want TTF. Let's try the variable font approach.
+    # Or better: use google-fonts API for static TTF
+
+    # NotoSansSC static TTF from Google Fonts CDN
+    tmp_font = "/tmp/NotoSansSC-Regular.ttf"
+
+    if not os.path.exists(tmp_font):
+        print(f"Downloading NotoSansSC-Regular.ttf...")
+        url = "https://github.com/notofonts/noto-cjk/raw/main/Sans/OTC/NotoSansCJK-Regular.ttc"
+        # This is a TTC (font collection), too large.
+        # Use the OTF we already have and convert it.
+        return False
+
+    print(f"Using {tmp_font}")
+    font = TTFont(tmp_font)
+    cmap = font.getBestCmap()
+    print(f"Source has {len(cmap)} cmap entries")
+
+    needed = get_needed_codepoints()
+    print(f"Need {len(needed)} CJK codepoints")
+
+    # Subset
+    subsetter = subset.Subsetter()
+    subsetter.populate(unicodes=needed)
+    subsetter.subset(font)
+
+    font.save(DST_TTF)
+    print(f"Saved: {DST_TTF} ({os.path.getsize(DST_TTF)} bytes)")
+    font.close()
+    return True
+
+
+def verify_result():
+    """Verify the output TTF has glyf outlines and correct characters."""
+    from fontTools.ttLib import TTFont
+
+    font = TTFont(DST_TTF)
+    cmap = font.getBestCmap()
+
+    print(f"\n--- Verification ---")
+    print(f"Format: {font.sfntVersion!r}")
+    print(f"Has glyf: {'glyf' in font}")
+    print(f"Has CFF:  {'CFF ' in font}")
+    print(f"Cmap entries: {len(cmap)}")
+
+    # Check key characters
+    test_chars = {
+        "历": 0x5386, "史": 0x53F2,  # Chinese: history
+        "概": 0x6982, "述": 0x8FF0,  # Chinese: overview
+        "设": 0x8BBE, "置": 0x7F6E,  # Chinese: settings
+    }
+    for name, cp in test_chars.items():
+        status = "YES" if cp in cmap else "NO"
+        print(f"  {name} (U+{cp:04X}): {status}")
+
+    size = os.path.getsize(DST_TTF)
+    print(f"File size: {size} bytes ({size/1024:.1f} KB)")
+    font.close()
+
+
+if __name__ == "__main__":
+    print("=== CJK Font CFF -> TTF Converter ===\n")
+
+    if convert_cff_to_ttf():
+        verify_result()
+    else:
+        print("Direct conversion failed, trying download approach...")
+        if download_and_subset():
+            verify_result()
+        else:
+            print("ERROR: Could not convert font")
+            sys.exit(1)
--- a/scripts/fix_mojibake.py
+++ b/scripts/fix_mojibake.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+"""Fix mojibake en-dash (and other common patterns) in translation JSON files."""
+import os
+import glob
+
+LANG_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'res', 'lang')
+
+# Common mojibake patterns: UTF-8 bytes interpreted as Latin-1
+MOJIBAKE_FIXES = {
+    '\u00e2\u0080\u0093': '\u2013',  # en dash
+    '\u00e2\u0080\u0094': '\u2014',  # em dash
+    '\u00e2\u0080\u0099': '\u2019',  # right single quote
+    '\u00e2\u0080\u009c': '\u201c',  # left double quote
+    '\u00e2\u0080\u009d': '\u201d',  # right double quote
+    '\u00e2\u0080\u00a6': '\u2026',  # ellipsis
+}
+
+total_fixed = 0
+for path in sorted(glob.glob(os.path.join(LANG_DIR, '*.json'))):
+    with open(path, 'r', encoding='utf-8') as f:
+        raw = f.read()
+
+    original = raw
+    for bad, good in MOJIBAKE_FIXES.items():
+        if bad in raw:
+            count = raw.count(bad)
+            raw = raw.replace(bad, good)
+            lang = os.path.basename(path)
+            print(f"  {lang}: fixed {count} x {repr(good)}")
+            total_fixed += count
+
+    if raw != original:
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(raw)
+
+print(f"\nTotal fixes: {total_fixed}")