forked from vlfeat/matconvnet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
matdocparser.py
326 lines (277 loc) · 9.78 KB
/
matdocparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#!/usr/bin/python
# file: matdocparser.py
# author: Andrea Vedaldi
# description: Utility to format MATLAB comments.
# Copyright (C) 2014-15 Andrea Vedaldi.
# All rights reserved.
#
# This file is part of the VLFeat library and is made available under
# the terms of the BSD license (see the COPYING file).
"""
MatDocParser is an interpreter for the MatDoc format. This is a simplified and
stricter version of Markdown suitable to commenting MATLAB functions. the format
is easily understood from an example:
A paragraph starts on a new line.
And continues on following lines.
Indenting with a whitespace introduces a verbatim code section:
Like this
This continues it
Different paragraphs are separated by blank lines.
* The *, -, + symbols at the beginning of a line introduce a list.
Which can be continued on follwing paragraphs by proper indentation.
Multiple paragraphs in a list item are also supported.
* This is the second item of the same list.
It is also possible to have definition lists such as
Term1:: Short description 2
Longer explanation.
Behaves like a list item.
Term2:: Short description 2
Term3:: Short description 3
Longer explanations are optional.
"""
import sys
import os
import re
__mpname__ = 'MatDocParser'
__version__ = '1.0-beta1'
__date__ = '2014-12-29'
__description__ = 'MatDoc MATLAB inline function description interpreter.'
__long_description__ = __doc__
__license__ = 'BSD'
__author__ = 'Andrea Vedaldi'
# --------------------------------------------------------------------
# Input line types (terminal symbols)
# --------------------------------------------------------------------
# Terminal symbols are organized in a hierarchy. Each line in the
# input document is mapped to leaf in this hierarchy, representing
# the type of line detected.
class Symbol(object):
indent = None
def isa(self, classinfo, indent = None):
return isinstance(self, classinfo) and \
(indent is None or self.indent == indent)
def __str__(self, indent = 0):
if self.indent is not None: x = "%d" % self.indent
else: x = "*"
return " "*indent + "%s(%s)" % (self.__class__.__name__, x)
# Terminal symbols
# Note that PL, BH, DH are all subclasses of L; the fields .text and .indent
# have the same meaning for all of them.
class Terminal(Symbol): pass
class EOF (Terminal): pass # end-of-file
class B (Terminal): pass # blank linke
class L (Terminal): # non-empty line: '<" "*indent><text>'
text = ""
def __str__(self, indent = 0):
return "%s: %s" % (super(L, self).__str__(indent), self.text)
class PL (L): pass # regular line
class BH (L): # bullet: a line of type ' * <inner_text>'
inner_indent = None
inner_text = None
bullet = None
class DH (L): # description: a line of type ' <description>::<inner_text>'
inner_text = None
description = None
# A lexer object: parse lines of the input document into terminal symbols
class Lexer(object):
def __init__(self, lines):
self.lines = lines
self.pos = -1
def next(self):
self.pos = self.pos + 1
# no more
if self.pos > len(self.lines)-1:
x = EOF()
return x
line = self.lines[self.pos]
# a blank line
match = re.match(r"\s*\n?$", line) ;
if match:
return B()
# a line of type ' <content>::<inner_text>'
match = re.match(r"(\s*)(.*)::(.*)\n?$", line)
if match:
x = DH()
x.indent = len(match.group(1))
x.description = match.group(2)
x.inner_text = match.group(3)
x.text = x.description + "::" + x.inner_text
return x
# a line of type ' * <inner_contet>'
match = re.match(r"(\s*)([-\*+]\s*)(\S.*)\n?$", line)
if match:
x = BH()
x.indent = len(match.group(1))
x.bullet = match.group(2)
x.inner_indent = x.indent + len(x.bullet)
x.inner_text = match.group(3)
x.text = x.bullet + x.inner_text
return x
# a line of the type ' <content>'
match = re.match(r"(\s*)(\S.*)\n?$", line)
if match:
x = PL()
x.indent = len(match.group(1))
x.text = match.group(2)
return x
# --------------------------------------------------------------------
# Non-terminal
# --------------------------------------------------------------------
# DIVL is a consecutive list of blocks with the same indent and/or blank
# lines.
#
# DIVL(indent) -> (B | P(indent) | V(indent) | BL(indent) | DL(indent))+
#
# A P(indent) is a paragraph, a list of regular lines indentent by the
# same amount.
#
# P(indent) -> PL(indent)+
#
# A V(indent) is a verbatim (code) block. It contains text lines and blank
# lines that have indentation strictly larger than indent:
#
# V(indent) -> L(i) (B | L(j), j > indent)+, for all i > indent
#
# A DL(indent) is a description list:
#
# DL(indent) -> DH(indent) DIVL(i)*, i > indent
#
# A BL(indent) is a bullet list. It contains bullet list items, namely
# a sequence of special DIVL_BH(indent,inner_indent) whose first block
# is a paragaraph P_BH(indent,inner_indent) whose first line is a
# bullet header BH(indent,innner_indent). Here the bullet identation
# inner_indent is obtained as the inner_indent of the
# BH(indent,inner_indent) symbol. Formalising this with grammar rules
# is verbose; instead we use the simple `hack' of defining
#
# BL(indent) -> (DIVL(inner_indent))+
#
# where DIVL(inner_indent) are regular DIVL, obtaine after replacing
# the bullet header line BH with a standard paragraph line PL.
class NonTerminal(Symbol):
children = []
def __init__(self, *args):
self.children = list(args)
def __str__(self, indent = 0):
s = " "*indent + super(NonTerminal, self).__str__() + "\n"
for c in self.children:
s += c.__str__(indent + 2) + "\n"
return s[:-1]
class DIVL(NonTerminal): pass
class DIV(NonTerminal): pass
class BL(NonTerminal): pass
class DL(NonTerminal): pass
class DI(NonTerminal): pass
class P(DIV): pass
class V(DIV): pass
# --------------------------------------------------------------------
class Parser(object):
lexer = None
stack = []
lookahead = None
def shift(self):
if self.lookahead:
self.stack.append(self.lookahead)
self.lookahead = self.lexer.next()
def reduce(self, S, n, indent = None):
#print "reducing %s with %d" % (S.__name__, n)
s = S(*self.stack[-n:])
del self.stack[-n:]
s.indent = indent
self.stack.append(s)
return s
def parse(self, lexer):
self.lexer = lexer
self.stack = []
while True:
self.lookahead = self.lexer.next()
if not self.lookahead.isa(B): break
self.parse_DIVL(self.lookahead.indent)
return self.stack[0]
def parse_P(self, indent):
i = 0
if indent is None: indent = self.lookahead.indent
while self.lookahead.isa(PL, indent):
self.shift()
i = i + 1
self.reduce(P, i, indent)
def parse_V(self, indent):
i = 0
while (self.lookahead.isa(L) and self.lookahead.indent > indent) or \
(self.lookahead.isa(B)):
self.shift()
i = i + 1
self.reduce(V, i, indent)
def parse_DIV_helper(self, indent):
if self.lookahead.isa(PL, indent):
self.parse_P(indent)
elif self.lookahead.isa(L) and (self.lookahead.indent > indent):
self.parse_V(indent)
elif self.lookahead.isa(BH, indent):
self.parse_BL(indent)
elif self.lookahead.isa(DH, indent):
self.parse_DL(indent)
elif self.lookahead.isa(B):
self.shift()
else:
return False
# leaves with B, P(indent), V(indent), BL(indent) or DL(indent)
return True
def parse_BI_helper(self, indent):
x = self.lookahead
if not x.isa(BH, indent): return False
indent = x.inner_indent
self.lookahead = PL()
self.lookahead.text = x.inner_text
self.lookahead.indent = indent
self.parse_DIVL(indent)
# leaves with DIVL(inner_indent) where inner_indent was
# obtained from the bullet header symbol
return True
def parse_BL(self, indent):
i = 0
while self.parse_BI_helper(indent): i = i + 1
if i == 0: print "Error", sys.exit(1)
self.reduce(BL, i, indent)
def parse_DI_helper(self, indent):
if not self.lookahead.isa(DH, indent): return False
self.shift()
if self.lookahead.indent > indent:
self.parse_DIVL(self.lookahead.indent)
self.reduce(DI, 2, indent)
else:
self.reduce(DI, 1, indent)
return True
def parse_DL(self, indent):
i = 0
while self.parse_DI_helper(indent): i = i + 1
if i == 0: print "Error", sys.exit(1)
self.reduce(DL, i, indent)
def parse_DIVL(self, indent = None):
i = 0
while self.parse_DIV_helper(indent):
if indent is None: indent = self.stack[-1].indent
i = i + 1
self.reduce(DIVL, i, indent)
if __name__ == '__main__':
str="""
Some text describing a MATLAB function F().
The function F() does nothing.
It has the following options:
CarryOn:: True
Keep doing nothing for the time being.
Stop:: 'here'
Stop doing whathever here. Example:
% call the function
f('stop', 'there')
% contemplate the results
So in short we conclude that:
* This does nothing
* It could do something,
but still does not.
See also: hope for the best.
"""
parser = Parser()
lexer = Lexer(str.split('\n'))
tree = parser.parse(lexer)
print tree