-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.py
174 lines (123 loc) · 5.05 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# goal: go from
# [XP [X' [YP [Y' [Y your] [ZP [Z' [Z word]]]]]]]
# to a tree. Etc.
import sys
from parse import Parse as p
from node import Node, settings
# # okay, now link up node and ParseResults
# cases to handle
def leaf (text): # takes no children
return Node(text)
# the issue:
# it's the case that any text item in a list _must_ be a triangle
# but it was only getting handled as a leaf
def triangle (text):
# assumes there's at least at least one space in text passed
pieces = text.split(' ')
category = pieces[0]
children_text = ' '.join(pieces[1:])
# text_as_leaf_nodes = [leaf(word) for word in children_text]
# return Node(category, text_as_leaf_nodes, is_triangle=True)
text_as_leaf_node = Node(children_text, [], True)
# text_as_leaf_node = leaf(children_text)
return Node(category, [text_as_leaf_node], True)
def is_triangle (text):
text_pieces = text.split(' ')
return len(text_pieces) > 1
def handle (label, item):
# there is always a LHS and a RHS of a map entry
# the LHS is always text
# the RHS is either text, a list, or a one-entry dictionary
t = type(item)
handlers = {
str : handle_text,
dict : handle_dict,
list : handle_list
}
return handlers[t](label, item)
def handle_text(label, text):
# handle a leaf and its label, whether that leaf is a triangle or not
if is_triangle(text):
return triangle(text)
else:
inner = leaf(text)
return Node(label, [inner])
def handle_dict (label, d):
# i.e. one child
k, v = d.popitem()
inner_node = handle(k, v)
return Node(label, [inner_node])
def handle_list (label, l):
# the only 'edge' case here is where regular brackets are used like triangles,
# in which case we'd get a list of strings, not tuples
# thus we can't just do:
# sub_nodes = [handle(tup[0], tup[1]) for tup in l]
sub_nodes = []
for item in l:
if (type(item) == str):
node = triangle(item) if is_triangle(item) else leaf(item)
else:
k, v = item.popitem()
node = handle(k, v)
sub_nodes.append(node)
return Node(label, sub_nodes)
class Convert:
def __init__ (self, string = None, parse_results = None):
if (string != None):
parse_results = p.parse(string)
self.pr = parse_results
self.root = self.to_root()
def to_root (self):
if (len(self.pr) != 1):
return None # we require *exactly one* top-level tree
return self.pr[0]
def to_tree (self):
cat, rest = self.root.popitem()
return handle(cat, rest)
# one issue exists: some triangles are not being recognized as triangles
# more specifically, if a would-be triangle has a sister, it isn't recognized as being a triangle
def __nota_bene__():
def bar():
print('- ' * 20 + '-')
print("N.B. the difference between a non-triangle and a triangle, when taking multiple arguments:")
pprint(Convert("[DP [D' my dear old friend]]").root)
# >>> {'DP': {"D'": ['my', 'dear', 'old', 'friend']}}
pprint(Convert("[DP <D' my dear old friend>]").root)
# >>> {'DP': "D' my dear old friend"}
bar()
print("and when taking a single argument:")
pprint(Convert("[DP [D' John]]").root)
# >>>> {'DP': {"D'": 'John'}}
pprint(Convert("[DP <D' John>]").root)
# >>> {'DP': "D' John"}
bar()
print("Therefore, a dictionary whose value is a multi-word string is guaranteed to be a triangle,")
print("and a dictionary whose value is a list of strings is guaranteed to represent a node with a")
print("single category and two or more leaf children.")
print("(The latter has no meaning to my knowledge but is possible.)")
if __name__ == "__main__":
from pprint import pprint
W, H = 2500, 1000
coord = (W/2, 50)
from PIL import Image
triangle_tests = [
"[NP [D the] [N' <AdjP very big> [N dog]]]", # passes!
"[NP [DP [D the] [D 30]] [N' [AdjP very big] [N dogs]]]", # passes!
"[NP [DP [D the] [D 30]] [N' [AdjP <AdvP very very very> [A big]] [N dogs]]]", # gets cut off but I think it passes
# "[]", fails but that's expected
# "<>", fails but that's expected
"[IP [NP [DP [D the] [D 30]] [N' [AdjP very big] [N dogs]]] [I' [I will] [VP [V be] [P here]]]]", # gets get off but I think it passes
# "<IP [NP [DP [D the] [D 30]] [N' [AdjP very big] [N dogs]]] [I' [I will] [VP [V be] [P here]]]>", # fails but that's expected
"[IP <NP [DP [D the] [D 30]] [N' [AdjP very big] [N dogs]]> [I' [I will] <VP [V be] [P here]>]]", # it includes the brackets, but it works!
"[IP <NP the 30 very big dogs> [I' [I will] <VP be here>]]"
]
for s in triangle_tests:
image = Image.new("RGBA",(W,H),"white") # random
print(s)
pr = p.parse(s)
pprint(pr)
tree = Convert(parse_results = pr).to_tree()
tree.display()
tree.draw_node(image, coord = coord)
image.show()
__nota_bene__()