#!/usr/bin/env python3 """Fix mojibake en-dash (and other common patterns) in translation JSON files.""" import os import glob LANG_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'res', 'lang') # Common mojibake patterns: UTF-8 bytes interpreted as Latin-1 MOJIBAKE_FIXES = { '\u00e2\u0080\u0093': '\u2013', # en dash '\u00e2\u0080\u0094': '\u2014', # em dash '\u00e2\u0080\u0099': '\u2019', # right single quote '\u00e2\u0080\u009c': '\u201c', # left double quote '\u00e2\u0080\u009d': '\u201d', # right double quote '\u00e2\u0080\u00a6': '\u2026', # ellipsis } total_fixed = 0 for path in sorted(glob.glob(os.path.join(LANG_DIR, '*.json'))): with open(path, 'r', encoding='utf-8') as f: raw = f.read() original = raw for bad, good in MOJIBAKE_FIXES.items(): if bad in raw: count = raw.count(bad) raw = raw.replace(bad, good) lang = os.path.basename(path) print(f" {lang}: fixed {count} x {repr(good)}") total_fixed += count if raw != original: with open(path, 'w', encoding='utf-8') as f: f.write(raw) print(f"\nTotal fixes: {total_fixed}")