more markdown tooling

2023-04-18 20:32:25 +03:00
parent da5e20e867
commit d5c4390813
6 changed files with 197 additions and 0 deletions
--- a/reporting/md-counters
+++ b/reporting/md-counters
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+import argparse
+import sys
+import re
+
+
+def get_opts():
+    parser = argparse.ArgumentParser(
+        description="Counters for markdown.",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument("-n", help="Number headers", action="store_true")
+    parser.add_argument("-e", help="Number enumerated lists", action="store_true")
+    parser.add_argument("-c", help="Add counter", action="append", default=[])
+    parser.add_argument(
+        "markdown",
+        help="Filename to read. - for stdin",
+        action="store",
+        default="-",
+        nargs="?",
+    )
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    opts = get_opts()
+    if opts.markdown == "-":
+        fp = sys.stdin
+    else:
+        fp = open(opts.markdown, "rt")
+    markdown = fp.read().splitlines()
+
+    counters = {"_headers": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
+    lastlev = {"_headers": 0}
+    number_match = re.compile("[\d\.]+")
+
+    counters["_enumerator"] = [0, 0, 0, 0]
+    lastlev["_enumerator"] = 0
+    enumerator_match = [re.compile("^" + " " * (4 * x) + "[\d]+\. ") for x in range(4)]
+    in_enumerator = False
+
+    for i, row in enumerate(markdown):
+        if opts.n:
+            if row.startswith("#"):
+                headers, title = row.strip().split(" ", 1)
+                # remove old numbered title
+                if re.match("^\d", title):
+                    if number_match.match(title.split(" ", 1)[0]):
+                        removed_number, title = title.split(" ", 1)
+
+                depth = headers.count("#") - 1
+                if depth < lastlev["_headers"]:
+                    counters["_headers"] = [
+                        x if i <= depth else 0
+                        for i, x in enumerate(counters["_headers"])
+                    ]
+                counters["_headers"][depth] += 1
+                lastlev["_headers"] = depth
+                number = "".join(
+                    ["{:d}.".format(counters["_headers"][x]) for x in range(depth + 1)]
+                )
+                row = "{} {} {}".format(headers, number, title)
+
+        if opts.e:
+            if not in_enumerator and enumerator_match[0].match(row):
+                in_enumerator = True
+                counters["_enumerator"] = [0, 0, 0, 0]
+                lastlev["_enumerator"] = 0
+
+            if in_enumerator:
+                enumerator_depths = [
+                    enumerator_match[x].match(row) for x in range(len(enumerator_match))
+                ]
+                if any(enumerator_depths):
+                    enumerator_depth = min(
+                        [i for i, x in enumerate(enumerator_depths) if x]
+                    )
+                leading_spaces = len(row) - len(row.lstrip(" "))
+
+                # if ^spaces = 4x depth
+                # if row not empty, and doesnt match to enumerator -> reset
+                if (
+                    len(row.strip()) > 0
+                    and not any(enumerator_depths)
+                    and leading_spaces == 0
+                ):
+                    in_enumerator = False
+
+            if in_enumerator:
+                if any(enumerator_depths):
+                    if enumerator_depth < lastlev["_enumerator"]:
+                        counters["_enumerator"] = [
+                            x if i <= enumerator_depth else 0
+                            for i, x in enumerate(counters["_enumerator"])
+                        ]
+                    counters["_enumerator"][enumerator_depth] += 1
+                    lastlev["_enumerator"] = enumerator_depth
+
+                    removed_number, content = row.lstrip(" ").split(" ", 1)
+                    number = "{:d}.".format(counters["_enumerator"][enumerator_depth])
+                    row = "{}{} {}".format(" " * leading_spaces, number, content)
+
+        print(row)
+
+
+if __name__ == "__main__":
+    main()