From 76debe0071fef6865bd0fb79199f53932bf063c0 Mon Sep 17 00:00:00 2001 From: Radu Date: Sat, 26 Aug 2023 17:29:35 -0400 Subject: Write a working version and include licences Support some symbols, arrows, bold, subscript and superscript. --- superscript.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100755 superscript.py (limited to 'superscript.py') diff --git a/superscript.py b/superscript.py new file mode 100755 index 0000000..f755de0 --- /dev/null +++ b/superscript.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only + +# TODO: Approximate {sub,super}script using other characters? + +from sys import stderr + +with open("UnicodeData.txt") as f: + data = f.read() +data = data.split("\n") + +superscripts = {} + + +def line_to_char(line): + return chr(int(line.split(";", 1)[0], 16)) + + +for line in data: + if "SUPERSCRIPT" in line: + normal_name = line.split("SUPERSCRIPT ", 1)[1].split(";", 1)[0] + prefix = line.split(";", 1)[1].split("SUPERSCRIPT", 1)[0] + normal_line = None + for line2 in data: + if ( + normal_name in line2 + and prefix in line2 + and line != line2 + and "" not in line2 + ): + normal_line = line2 + break + if not normal_line: + print("not found:", line, file=stderr) + continue + char = line_to_char(line) + normal = line_to_char(normal_line) + if normal not in superscripts: + superscripts[normal] = char + elif "MODIFIER LETTER" in line: + name = line.split("MODIFIER LETTER ", 1)[1].split(";", 1)[0] + try_names = [name] + if "SMALL GREEK" in name: + try_names.append("GREEK SMALL LETTER" + name.split("SMALL GREEK", 1)[1]) + elif "SMALL CYRILLIC" in name: + try_names.append( + "CYRILLIC SMALL LETTER" + name.split("SMALL CYRILLIC", 1)[1] + ) + elif "CYRILLIC SMALL" in name: + try_names.append( + "CYRILLIC SMALL LETTER" + name.split("CYRILLIC SMALL", 1)[1] + ) + elif "SMALL CAPITAL" in name: + try_names.append( + "LATIN LETTER SMALL CAPITAL" + name.split("SMALL CAPITAL", 1)[1] + ) + elif "SMALL" in name: + s = name.split("SMALL", 1)[1] + if "LIGATURE OE" in name: + try_names.append("LATIN SMALL" + s) + else: + try_names.append("LATIN SMALL LETTER" + s) + elif "CAPITAL" in name: + s = name.split("CAPITAL", 1)[1] + if "BARRED B" in name: + try_names.append("LATIN LETTER SMALL CAPITAL" + s) + elif "REVERSED N" in name: + try_names.append("LATIN LETTER SMALL CAPITAL" + s) + else: + try_names.append("LATIN CAPITAL LETTER" + s) + normal_line = None + for line2 in data: + if ( + any(map(lambda x: ";" + x + ";" in line2, try_names)) + and line != line2 + and "" not in line2 + ): + normal_line = line2 + break + if not normal_line: + print("not found:", line, file=stderr) + continue + char = line_to_char(line) + normal = line_to_char(normal_line) + if normal not in superscripts: + superscripts[normal] = char + +superscripts[" "] = " " + +import json + +print("window.unicodeMaps.toSuperscript =", json.dumps(superscripts), ";") +print(superscripts, file=stderr) -- cgit v1.2.3