#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only from sys import stderr with open("UnicodeData.txt") as f: data = f.read() data = data.split("\n") bolds = {} def line_to_char(line): return chr(int(line.split(";", 1)[0], 16)) for line in data: if "BOLD" in line: bold_name = line.split(";", 2)[1] try_names = [] if bold_name.startswith("MATHEMATICAL BOLD CAPITAL "): try_names.append("LATIN CAPITAL LETTER " + bold_name[len("MATHEMATICAL BOLD CAPITAL "):]) if bold_name.endswith(" SYMBOL"): try_names.append("GREEK CAPITAL " + bold_name[len("MATHEMATICAL BOLD CAPITAL "):]) else: try_names.append("GREEK CAPITAL LETTER " + bold_name[len("MATHEMATICAL BOLD CAPITAL "):]) elif bold_name.startswith("MATHEMATICAL BOLD SMALL "): try_names.append("LATIN SMALL LETTER " + bold_name[len("MATHEMATICAL BOLD SMALL "):]) try_names.append("GREEK SMALL LETTER " + bold_name[len("MATHEMATICAL BOLD SMALL "):]) elif ( bold_name.startswith("MATHEMATICAL BOLD FRAKTUR ") or bold_name.startswith("MATHEMATICAL BOLD ITALIC ") or bold_name.startswith("MATHEMATICAL BOLD SCRIPT ") ): if bold_name.startswith("MATHEMATICAL BOLD FRAKTUR SMALL "): try_names.append("LATIN SMALL LETTER " + bold_name[len("MATHEMATICAL BOLD FRAKTUR SMALL "):]) if bold_name.startswith("MATHEMATICAL BOLD FRAKTUR CAPITAL "): try_names.append("LATIN CAPITAL LETTER " + bold_name[len("MATHEMATICAL BOLD FRAKTUR CAPITAL "):]) else: normal_name = "".join(bold_name.split("BOLD ", 1)) try_names.append("SANS-SERIF ".join(bold_name.split("BOLD ", 1))) try_names.append(bold_name.split("BOLD ", 1)[1]) elif bold_name.startswith("MATHEMATICAL SANS-SERIF BOLD "): try_names.append("".join(bold_name.split("BOLD ", 1))) try_names.append("".join(normal_name.split("SANS-SERIF ", 1))) if bold_name.endswith(" SYMBOL"): try_names.append("GREEK " + bold_name[len("MATHEMATICAL SANS-SERIF BOLD "):]) try_names.append("GREEK LUNATE " + bold_name[len("MATHEMATICAL SANS-SERIF BOLD "):]) else: try_names.append("GREEK " + " LETTER ".join(bold_name.split("BOLD ", 1)[1].split(" ", 1))) try_names.append(bold_name[len("MATHEMATICAL SANS-SERIF BOLD "):]) elif bold_name.startswith("MATHEMATICAL BOLD DIGIT "): try_names.append("DIGIT " + bold_name[len("MATHEMATICAL BOLD DIGIT "):]) elif bold_name.startswith("MATHEMATICAL BOLD "): try_names.append("GREEK " + bold_name[len("MATHEMATICAL BOLD "):]) try_names.append("GREEK LUNATE " + bold_name[len("MATHEMATICAL BOLD "):]) try_names.append(bold_name[len("MATHEMATICAL BOLD "):]) else: normal_name = bold_name normal_line = None try_names.insert(0, normal_name) for x in try_names: for line2 in data: if ( ";" + x + ";" in line2 and line != line2 and "" not in line2 ): normal_line = line2 break if not normal_line: print("not found:", line, file=stderr) continue char = line_to_char(line) normal = line_to_char(normal_line) if normal not in bolds: bolds[normal] = char import json print("window.unicodeMaps.toBold =", json.dumps(bolds), ";") print(bolds, file=stderr)