forked from metamath/set.mm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
typeset.py
184 lines (159 loc) · 5.94 KB
/
typeset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/bin/python
# Process a file's `...` instructions using an mmfile's typographical commands
# Reads from stdin and sends to stdout. By default the mmfile is "set.mm".
# Use the "--help" option to see all the options.
# Sample usage:
# python scripts/typeset.py --html < demo.html.raw > demo.html
# Copyright 2018, David A. Wheeler
# SPDX-License-Identifier: MIT
# Use Python 3 print() syntax, even if we run this in Python 2.
from __future__ import print_function
import sys
import re
import fileinput
import argparse
# Force hashes to be sorted by insertion order, so --list is easy.
# All hashes do this on Python 3.7+, but we want it to happen
# in Python 2.7+ as well. Using OrderedDict makes it work in all cases.
from collections import OrderedDict
# TODO: Currently each `...` must not cross lines.
# TODO: Currently this doesn't handle 'no space if followed by punctuation'.
# Apply typographical definitions in MMFILE which look like this:
# htmldef "(" as "<IMG SRC='lp.gif' WIDTH=5 HEIGHT=19 ALT=' (' TITLE='('>";
# We read in the typographical instructions using a simple
# recursive descent parser, then translate what is in `...`.
# Default options
required_start = 'althtmldef '
mmfile = 'set.mm'
# We read typo definitions from typo_file.
typo_file = None
# "remains" is what's left to read on this line from typo_file
remains = ''
# This is the set of definitions we read from typo_file.
# Each key can be in `...`; the corresponding values are their translations.
typo_definition = OrderedDict()
def read_fill_line():
'''
Clear out leading space, and if current line is empty, read next line
from typo_file into "remains". Returns '' when at end of file.
'''
global remains
remains = remains.lstrip()
if remains == '':
remains = typo_file.readline()
if remains == '': return # File done
read_fill_line() # Recurse until we find something
def read_comment():
'''Skip through a /* ... */ comment'''
global remains
while '*/' not in remains:
remains = ''
read_fill_line()
comment, junk, remains = remains.partition('*/')
def read_required(value):
'''Read a required symbol; error out if it is not there'''
global remains
read_fill_line()
if not remains.startswith(value):
raise 0
remains = remains[len(value):]
read_fill_line()
def read_string():
'''Read a typograhical string, which may continue using +'''
global remains
read_fill_line()
if remains == '':
return result # EOF
elif remains[0] == '"' or remains[0] == "'":
result, junk, remains = remains[1:].partition(remains[0])
read_fill_line()
# Recurse if we have +
if remains and remains[0] == '+':
read_required('+')
return result + read_string()
return result
elif remains.startswith('/*'):
# This presumes /*..*/ comments stay on one line
comment, junk, remains = remains.partition('*/')
return read_string()
else:
raise 0 # BOGUS
def read_definition():
'''Read in a definition from typo_file given remains'''
symbol = read_string()
read_required('as')
result = read_string()
read_required(';')
typo_definition[symbol] = result # Set definition in typo_definition.
def read_definitions():
'''Read all definitions from typo_file'''
global typo_file
global remains
typo_file = open(mmfile)
while True:
read_fill_line()
if remains == '': break # We have reached end of file.
stripped = remains.lstrip()
if stripped.startswith('/*'): # Skip comments
if '*/' in stripped:
comment, junk, remains = remains.partition('*/')
else:
read_comment()
if stripped.startswith(required_start): # Found our definition?
remains = stripped[len(required_start):]
read_definition()
else:
remains = '' # clear out line so we'll read next one
# This searches for backquoted text in a line.
backquoted = re.compile(r'(?:^|(?<= ))` +(([^`]|``)*) +`(?=$| )')
def replace_typographically(m):
'''
Given a match item, return string;
each word is replaced using typo_definition.
If something isn't in the list, return the original list surrounded
by backquotes (presumably we weren't supposed to get this).
'''
inner = m.group(1) # The text to change
translated_list = list(map(lambda i: typo_definition.get(i, None),
inner.split()))
if None in translated_list:
return '` ' + inner + ' `' # Return untransformed version
else:
translation = ' '.join(translated_list)
return translation.strip()
# Set up option handling
my_parser = argparse.ArgumentParser()
my_parser.add_argument('--html', help='Use older HTML format',
action="store_true")
my_parser.add_argument('--althtml', help='Use ALTHTML format (default)',
action="store_true")
my_parser.add_argument('--latex', help='Use LaTex format',
action="store_true")
my_parser.add_argument('--mmfile', help='Use this mmfile (default set.mm)')
my_parser.add_argument('--list',
help='List symbols and their results (in tab separated value format). ' +
'This is sorted in $t order (this may differ from $c declaration order)',
action="store_true")
args = my_parser.parse_args()
# Handle command-line options
if args.html:
required_start = 'htmldef '
if args.althtml:
required_start = 'althtmldef '
if args.latex:
required_start = 'latexdef '
if args.mmfile:
mmfile = args.mmfile
# Read in the typographic definitions.
read_definitions()
# print(typo_definition)
if args.list:
print('symbol\ttranslation')
for sym, translation in typo_definition.items():
print('{}\t{}'.format(sym, translation))
sys.exit(0)
# Now translate stdin using those definitions.
for line in sys.stdin:
s = backquoted.search(line)
new_line = re.sub(backquoted, replace_typographically, line)
print(new_line, end='')