aboutsummaryrefslogtreecommitdiff
path: root/superscript.py
diff options
context:
space:
mode:
Diffstat (limited to 'superscript.py')
-rwxr-xr-xsuperscript.py93
1 files changed, 93 insertions, 0 deletions
diff --git a/superscript.py b/superscript.py
new file mode 100755
index 0000000..f755de0
--- /dev/null
+++ b/superscript.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+
+# TODO: Approximate {sub,super}script using other characters?
+
+from sys import stderr
+
+with open("UnicodeData.txt") as f:
+ data = f.read()
+data = data.split("\n")
+
+superscripts = {}
+
+
+def line_to_char(line):
+ return chr(int(line.split(";", 1)[0], 16))
+
+
+for line in data:
+ if "SUPERSCRIPT" in line:
+ normal_name = line.split("SUPERSCRIPT ", 1)[1].split(";", 1)[0]
+ prefix = line.split(";", 1)[1].split("SUPERSCRIPT", 1)[0]
+ normal_line = None
+ for line2 in data:
+ if (
+ normal_name in line2
+ and prefix in line2
+ and line != line2
+ and "<control>" not in line2
+ ):
+ normal_line = line2
+ break
+ if not normal_line:
+ print("not found:", line, file=stderr)
+ continue
+ char = line_to_char(line)
+ normal = line_to_char(normal_line)
+ if normal not in superscripts:
+ superscripts[normal] = char
+ elif "MODIFIER LETTER" in line:
+ name = line.split("MODIFIER LETTER ", 1)[1].split(";", 1)[0]
+ try_names = [name]
+ if "SMALL GREEK" in name:
+ try_names.append("GREEK SMALL LETTER" + name.split("SMALL GREEK", 1)[1])
+ elif "SMALL CYRILLIC" in name:
+ try_names.append(
+ "CYRILLIC SMALL LETTER" + name.split("SMALL CYRILLIC", 1)[1]
+ )
+ elif "CYRILLIC SMALL" in name:
+ try_names.append(
+ "CYRILLIC SMALL LETTER" + name.split("CYRILLIC SMALL", 1)[1]
+ )
+ elif "SMALL CAPITAL" in name:
+ try_names.append(
+ "LATIN LETTER SMALL CAPITAL" + name.split("SMALL CAPITAL", 1)[1]
+ )
+ elif "SMALL" in name:
+ s = name.split("SMALL", 1)[1]
+ if "LIGATURE OE" in name:
+ try_names.append("LATIN SMALL" + s)
+ else:
+ try_names.append("LATIN SMALL LETTER" + s)
+ elif "CAPITAL" in name:
+ s = name.split("CAPITAL", 1)[1]
+ if "BARRED B" in name:
+ try_names.append("LATIN LETTER SMALL CAPITAL" + s)
+ elif "REVERSED N" in name:
+ try_names.append("LATIN LETTER SMALL CAPITAL" + s)
+ else:
+ try_names.append("LATIN CAPITAL LETTER" + s)
+ normal_line = None
+ for line2 in data:
+ if (
+ any(map(lambda x: ";" + x + ";" in line2, try_names))
+ and line != line2
+ and "<control>" not in line2
+ ):
+ normal_line = line2
+ break
+ if not normal_line:
+ print("not found:", line, file=stderr)
+ continue
+ char = line_to_char(line)
+ normal = line_to_char(normal_line)
+ if normal not in superscripts:
+ superscripts[normal] = char
+
+superscripts[" "] = " "
+
+import json
+
+print("window.unicodeMaps.toSuperscript =", json.dumps(superscripts), ";")
+print(superscripts, file=stderr)