#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only # TODO: Approximate {sub,super}script using other characters? from sys import stderr with open("UnicodeData.txt") as f: data = f.read() data = data.split("\n") superscripts = {} def line_to_char(line): return chr(int(line.split(";", 1)[0], 16)) for line in data: if "SUPERSCRIPT" in line: normal_name = line.split("SUPERSCRIPT ", 1)[1].split(";", 1)[0] prefix = line.split(";", 1)[1].split("SUPERSCRIPT", 1)[0] normal_line = None for line2 in data: if ( normal_name in line2 and prefix in line2 and line != line2 and "" not in line2 ): normal_line = line2 break if not normal_line: print("not found:", line, file=stderr) continue char = line_to_char(line) normal = line_to_char(normal_line) if normal not in superscripts: superscripts[normal] = char elif "MODIFIER LETTER" in line: name = line.split("MODIFIER LETTER ", 1)[1].split(";", 1)[0] try_names = [name] if "SMALL GREEK" in name: try_names.append("GREEK SMALL LETTER" + name.split("SMALL GREEK", 1)[1]) elif "SMALL CYRILLIC" in name: try_names.append( "CYRILLIC SMALL LETTER" + name.split("SMALL CYRILLIC", 1)[1] ) elif "CYRILLIC SMALL" in name: try_names.append( "CYRILLIC SMALL LETTER" + name.split("CYRILLIC SMALL", 1)[1] ) elif "SMALL CAPITAL" in name: try_names.append( "LATIN LETTER SMALL CAPITAL" + name.split("SMALL CAPITAL", 1)[1] ) elif "SMALL" in name: s = name.split("SMALL", 1)[1] if "LIGATURE OE" in name: try_names.append("LATIN SMALL" + s) else: try_names.append("LATIN SMALL LETTER" + s) elif "CAPITAL" in name: s = name.split("CAPITAL", 1)[1] if "BARRED B" in name: try_names.append("LATIN LETTER SMALL CAPITAL" + s) elif "REVERSED N" in name: try_names.append("LATIN LETTER SMALL CAPITAL" + s) else: try_names.append("LATIN CAPITAL LETTER" + s) normal_line = None for line2 in data: if ( any(map(lambda x: ";" + x + ";" in line2, try_names)) and line != line2 and "" not in line2 ): normal_line = line2 break if not normal_line: print("not found:", line, file=stderr) continue char = line_to_char(line) normal = line_to_char(normal_line) if normal not in superscripts: superscripts[normal] = char superscripts[" "] = " " import json print("window.unicodeMaps.toSuperscript =", json.dumps(superscripts), ";") print(superscripts, file=stderr)