#!/usr/bin/env python3 """Check which characters in translation files fall outside the font glyph ranges.""" import json import unicodedata import glob import os SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) LANG_DIR = os.path.join(SCRIPT_DIR, '..', 'res', 'lang') # Glyph ranges from typography.cpp (regular font + CJK merge) RANGES = [ # Regular font ranges (0x0020, 0x00FF), # Basic Latin + Latin-1 Supplement (0x0100, 0x024F), # Latin Extended-A + B (0x0370, 0x03FF), # Greek and Coptic (0x0400, 0x04FF), # Cyrillic (0x0500, 0x052F), # Cyrillic Supplement (0x2000, 0x206F), # General Punctuation (0x2190, 0x21FF), # Arrows (0x2200, 0x22FF), # Mathematical Operators (0x2600, 0x26FF), # Miscellaneous Symbols # CJK ranges (0x2E80, 0x2FDF), # CJK Radicals (0x3000, 0x30FF), # CJK Symbols, Hiragana, Katakana (0x3100, 0x312F), # Bopomofo (0x31F0, 0x31FF), # Katakana Extensions (0x3400, 0x4DBF), # CJK Extension A (0x4E00, 0x9FFF), # CJK Unified Ideographs (0xAC00, 0xD7AF), # Hangul Syllables (0xFF00, 0xFFEF), # Fullwidth Forms ] def in_ranges(cp): return any(lo <= cp <= hi for lo, hi in RANGES) for path in sorted(glob.glob(os.path.join(LANG_DIR, '*.json'))): lang = os.path.basename(path).replace('.json', '') with open(path, 'r', encoding='utf-8') as f: data = json.load(f) missing = {} for key, val in data.items(): if not isinstance(val, str): continue for c in val: cp = ord(c) if cp > 0x7F and not in_ranges(cp): if c not in missing: missing[c] = [] missing[c].append(key) if missing: print(f"\n=== {lang}.json: {len(missing)} missing characters ===") for c in sorted(missing, key=lambda x: ord(x)): cp = ord(c) name = unicodedata.name(c, 'UNKNOWN') keys = missing[c][:3] key_str = ', '.join(keys) if len(missing[c]) > 3: key_str += f' (+{len(missing[c])-3} more)' print(f" U+{cp:04X} {c} ({name}) — used in: {key_str}") else: print(f"=== {lang}.json: OK (all characters covered) ===")