forked from GrammaTech/gtirb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocmd.py
executable file
·174 lines (149 loc) · 5.56 KB
/
preprocmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# doxygen is annoying about certain .md things so we have to
# preprocess before doxygenating
import os
import re
import sys
from functools import reduce
# Doxygen doesn't make heading anchors like it should.
#
# We can't insert them as a straight re substitution because we also
# need to make sure we're not accidentally matching something inside a
# codeblock.
def anchor_page_headings(pagetxt):
def heading_to_anchor(headtxt):
anchortxt = reduce(
lambda intxt, rep: intxt.replace(rep[0], rep[1]),
[headtxt.strip().lower()]
+ [(ch, "") for ch in (",", "/", "'", '"')]
+ [(" ", "-")],
)
return '<a name="{0}" id="{0}"></a>\n'.format(anchortxt)
def incomment(txt):
return txt.count("```") % 2 == 1
return reduce(
lambda intxt, rep_gp: re.sub(
rep_gp[0],
lambda m: (
m.group(0)
+ (
""
if incomment(pagetxt[: m.start()])
else heading_to_anchor(m.group(rep_gp[1]))
)
),
intxt,
),
[
pagetxt,
(r"(?:(^|\n)\#+\s+)(.*?)(?:\{.*?)?\n", 2),
(r"((?:^|\n)\S.*?)(?:\{\#.*?\}|)\n(?:(\=\=\=+|\-\-\-+)\n)", 1),
],
)
# Given a Python-looking token, compute its location in the Python API
# documentation and return an HTML link.
def makePyApiLink(pymatch):
parens = pymatch.group("parens")
parens = parens if parens is not None else ""
pytoken = pymatch.group("pytoken")
basename = ".".join((pytoken.split(".")[:-1]))
# heuristic to determine whether it's 'really' a submodule reference
basename = "gtirb" if not basename.islower() else basename
href = '<a href="python/{0}.html#{1}">{1}{2}</a>'.format(
basename, pytoken, parens
)
return href
# Make an HTML link to an example file
def makeDoxyExampleLink(exname, linktext, pathadj):
fname = exname.replace(".", "_8") + "-example.html"
return '<a href="{0}{1}">{2}</a>'.format(pathadj, fname, linktext)
(infile, outfile) = sys.argv[1:3]
# This controls some of the substitutions
outdir = os.path.basename(os.path.dirname(os.path.abspath(outfile)))
indir = os.path.basename(os.path.dirname(os.path.abspath(infile)))
substitutions = [
# doxygen is specifically annoying about fenced code, which
# only occurs in README.md.
(
r"\n( +)```bash(\n(.*\n)*?)\1```",
lambda m: m.group(1) + m.group(2).replace("\n", "\n "),
),
# .md file links to /doc/examples/*
# become Doxygen \ref links with adjusted path component
# (from main section) or finalized Doxygen links (otherwise)
(
r"\[([^]\n]*)\]\s*\(doc/examples/(.*?)\)",
(
lambda m: (
r'\\ref \2 "\1"'.format(m.group(2), m.group(1))
if outdir == "general"
else makeDoxyExampleLink(m.group(2), m.group(1), "../")
)
),
),
# Simplify links within same directory
(
r"\[([^]\n]*)\]\s*\(doc/{0}/(.*?).md\)".format(outdir),
r'\\ref md_\2 "\1"',
),
(r"\[([^]\n]*)\]\s*\(doc/{0}/(.*?)\)".format(outdir), r'\\ref \2 "\1"'),
# C++ has to be converted to doxygen \code ... \endcode
# to get linking
(r"```c\+\+(\n(.*\n)*?)```", r"\\code{.cpp}\1\\endcode"),
(r"```cpp(\n(.*\n)*?)```", r"\\code{.cpp}\1\\endcode"),
]
# ==============================
# Start collecting substitutions
substitutions = []
# adjust relative links from gtirb/doc/general/*.md to gtirb/doc/*.md
# and gtirb/*.md
if indir == "general":
substitutions += [(r"(?<=\()\.\.\/(?:\.\.\/)?([\w-]+\.md)(?=\))", r"\1")]
pytoken_re = r"(?P<pytoken>gtirb(\.[a-zA-Z]\w*)+)(?P<parens>\([^]\n]*?\))?"
if outdir == "general":
substitutions += [
# Heuristic recognition of links to Python API elements
# where a 'Python-looking token' has the form
# described by pytoken_re
# heuristic 1: Python-looking token occurs in a table row
# whose first cell contains "Python"
(
r"(^|\n)\| +[^|]*?Python.*? {0} .*?\n".format(pytoken_re),
lambda m: re.sub(pytoken_re, makePyApiLink, m.group(0)),
),
# heuristic 2: Python-looking token - optionally followed by
# paretheses with arbitrary contents - is link text of a link
# whose target is python/README.md
(
r"\[\s*{0}\s*\]".format(pytoken_re)
+ r"\([\w\./]*(?<=[\(\/])python\/README.md\)",
makePyApiLink,
),
# Links to other language APIs, iff those APIs are present
# (conditional managed by Doxygen)
(
r"\[([^]\n]*)\]\s*\(doc/cpp/README.md\)",
r' \\if CPP_ONLY <a href="cpp/index.html">\1</a> '
r"\\else \1 (not available) \\endif \n",
),
(
r"\[([^]\n]*)\]\s*\((python|cl)/README.md\)",
lambda m: (
' \\if {0}_ONLY <a href="{1}/index.html">{2}</a> '
"\\else {2} (not available) \\endif \n"
).format(
"PY" if m.group(2) == "python" else "CL",
m.group(2),
m.group(1),
),
),
] + substitutions
with open(infile, "r") as infh:
contents = infh.read()
contents = anchor_page_headings(contents)
contents = reduce(
lambda intxt, rep: re.sub(rep[0], rep[1], intxt),
[contents] + substitutions,
)
with open(outfile, "w") as outfh:
outfh.write("[TOC]\n") # insert page TOC
outfh.write(contents)