#!/usr/bin/env python3
"""Check which characters in translation files fall outside the font glyph ranges."""
import json
import unicodedata
import glob
import os

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LANG_DIR = os.path.join(SCRIPT_DIR, '..', 'res', 'lang')

# Glyph ranges from typography.cpp (regular font + CJK merge)
RANGES = [
    # Regular font ranges
    (0x0020, 0x00FF),  # Basic Latin + Latin-1 Supplement
    (0x0100, 0x024F),  # Latin Extended-A + B
    (0x0370, 0x03FF),  # Greek and Coptic
    (0x0400, 0x04FF),  # Cyrillic
    (0x0500, 0x052F),  # Cyrillic Supplement
    (0x2000, 0x206F),  # General Punctuation
    (0x2190, 0x21FF),  # Arrows
    (0x2200, 0x22FF),  # Mathematical Operators
    (0x2600, 0x26FF),  # Miscellaneous Symbols
    # CJK ranges
    (0x2E80, 0x2FDF),  # CJK Radicals
    (0x3000, 0x30FF),  # CJK Symbols, Hiragana, Katakana
    (0x3100, 0x312F),  # Bopomofo
    (0x31F0, 0x31FF),  # Katakana Extensions
    (0x3400, 0x4DBF),  # CJK Extension A
    (0x4E00, 0x9FFF),  # CJK Unified Ideographs
    (0xAC00, 0xD7AF),  # Hangul Syllables
    (0xFF00, 0xFFEF),  # Fullwidth Forms
]

def in_ranges(cp):
    return any(lo <= cp <= hi for lo, hi in RANGES)

for path in sorted(glob.glob(os.path.join(LANG_DIR, '*.json'))):
    lang = os.path.basename(path).replace('.json', '')
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    missing = {}
    for key, val in data.items():
        if not isinstance(val, str):
            continue
        for c in val:
            cp = ord(c)
            if cp > 0x7F and not in_ranges(cp):
                if c not in missing:
                    missing[c] = []
                missing[c].append(key)

    if missing:
        print(f"\n=== {lang}.json: {len(missing)} missing characters ===")
        for c in sorted(missing, key=lambda x: ord(x)):
            cp = ord(c)
            name = unicodedata.name(c, 'UNKNOWN')
            keys = missing[c][:3]
            key_str = ', '.join(keys)
            if len(missing[c]) > 3:
                key_str += f' (+{len(missing[c])-3} more)'
            print(f"  U+{cp:04X} {c} ({name}) — used in: {key_str}")
    else:
        print(f"=== {lang}.json: OK (all characters covered) ===")