From 76debe0071fef6865bd0fb79199f53932bf063c0 Mon Sep 17 00:00:00 2001
From: Radu <radu@pml4t.net>
Date: Sat, 26 Aug 2023 17:29:35 -0400
Subject: Write a working version and include licences

Support some symbols, arrows, bold, subscript and superscript.
---
 superscript.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100755 superscript.py

(limited to 'superscript.py')
diff --git a/superscript.py b/superscript.py
new file mode 100755
index 0000000..f755de0
--- /dev/null
+++ b/superscript.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+
+# TODO: Approximate {sub,super}script using other characters?
+
+from sys import stderr
+
+with open("UnicodeData.txt") as f:
+    data = f.read()
+data = data.split("\n")
+
+superscripts = {}
+
+
+def line_to_char(line):
+    return chr(int(line.split(";", 1)[0], 16))
+
+
+for line in data:
+    if "SUPERSCRIPT" in line:
+        normal_name = line.split("SUPERSCRIPT ", 1)[1].split(";", 1)[0]
+        prefix = line.split(";", 1)[1].split("SUPERSCRIPT", 1)[0]
+        normal_line = None
+        for line2 in data:
+            if (
+                normal_name in line2
+                and prefix in line2
+                and line != line2
+                and "<control>" not in line2
+            ):
+                normal_line = line2
+                break
+        if not normal_line:
+            print("not found:", line, file=stderr)
+            continue
+        char = line_to_char(line)
+        normal = line_to_char(normal_line)
+        if normal not in superscripts:
+            superscripts[normal] = char
+    elif "MODIFIER LETTER" in line:
+        name = line.split("MODIFIER LETTER ", 1)[1].split(";", 1)[0]
+        try_names = [name]
+        if "SMALL GREEK" in name:
+            try_names.append("GREEK SMALL LETTER" + name.split("SMALL GREEK", 1)[1])
+        elif "SMALL CYRILLIC" in name:
+            try_names.append(
+                "CYRILLIC SMALL LETTER" + name.split("SMALL CYRILLIC", 1)[1]
+            )
+        elif "CYRILLIC SMALL" in name:
+            try_names.append(
+                "CYRILLIC SMALL LETTER" + name.split("CYRILLIC SMALL", 1)[1]
+            )
+        elif "SMALL CAPITAL" in name:
+            try_names.append(
+                "LATIN LETTER SMALL CAPITAL" + name.split("SMALL CAPITAL", 1)[1]
+            )
+        elif "SMALL" in name:
+            s = name.split("SMALL", 1)[1]
+            if "LIGATURE OE" in name:
+                try_names.append("LATIN SMALL" + s)
+            else:
+                try_names.append("LATIN SMALL LETTER" + s)
+        elif "CAPITAL" in name:
+            s = name.split("CAPITAL", 1)[1]
+            if "BARRED B" in name:
+                try_names.append("LATIN LETTER SMALL CAPITAL" + s)
+            elif "REVERSED N" in name:
+                try_names.append("LATIN LETTER SMALL CAPITAL" + s)
+            else:
+                try_names.append("LATIN CAPITAL LETTER" + s)
+        normal_line = None
+        for line2 in data:
+            if (
+                any(map(lambda x: ";" + x + ";" in line2, try_names))
+                and line != line2
+                and "<control>" not in line2
+            ):
+                normal_line = line2
+                break
+        if not normal_line:
+            print("not found:", line, file=stderr)
+            continue
+        char = line_to_char(line)
+        normal = line_to_char(normal_line)
+        if normal not in superscripts:
+            superscripts[normal] = char
+
+superscripts[" "] = " "
+
+import json
+
+print("window.unicodeMaps.toSuperscript =", json.dumps(superscripts), ";")
+print(superscripts, file=stderr)
-- 
cgit v1.2.3