Skip to content

Commit a18996a

Browse files
author
Timo Schmid
committedDec 11, 2011
Merge branch 'release/0.4'
2 parents 40835c7 + de63139 commit a18996a

19 files changed

+2968
-1272
lines changed
 

‎.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@ __pycache__
33
*~
44
*.bin
55
*.xml
6+
*.class
7+
*.bak
8+
*.swp

‎LICENSE.txt

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
2+
All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
8+
* Redistributions of source code must retain the above copyright
9+
notice, this list of conditions and the following disclaimer.
10+
* Redistributions in binary form must reproduce the above copyright
11+
notice, this list of conditions and the following disclaimer in the
12+
documentation and/or other materials provided with the distribution.
13+
* Neither the name of the ERMW GmbH nor the names of its contributors
14+
may be used to endorse or promote products derived from this software
15+
without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

‎WcfPlugin.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
from __future__ import absolute_import
3+
from StringIO import StringIO
4+
import array
5+
6+
from bluec0re import ICallback
7+
8+
from wcf.records import Record, print_records, dump_records
9+
from wcf.xml2records import Parser
10+
11+
12+
def encode_decode(headers, data):
13+
14+
if not data:
15+
return headers, data
16+
17+
18+
if 'X-WCF-Encode' in headers:
19+
p = Parser()
20+
p.feed(data)
21+
data = dump_records(p.records)
22+
del headers['X-WCF-Encode']
23+
headers['Content-Type'] = 'application/soap+msbin1'
24+
headers['Content-Length'] = str(len(data))
25+
else:
26+
#print headers['Content-type']
27+
if 'Content-Type' not in headers or headers['Content-Type'] != 'application/soap+msbin1':
28+
return headers, data
29+
#print headers
30+
fp = StringIO(data)
31+
data = Record.parse(fp)
32+
fp.close()
33+
fp = StringIO()
34+
print_records(data, fp=fp)
35+
data = fp.getvalue()
36+
fp.close()
37+
headers['X-WCF-Encode'] = '1'
38+
headers['Content-Type'] = 'text/soap+xml'
39+
headers['Content-Length'] = str(len(data))
40+
return headers, data
41+
42+
class WcfPlugin(ICallback):
43+
44+
def __str__(self):
45+
return type(self).__name__
46+
47+
def processProxyMessage(self, *args, **kwargs):
48+
messageIsRequest = args[1]
49+
message = args[10]
50+
51+
message = message.tostring()
52+
i = message.find('\x0d\x0a\x0d\x0a')
53+
header = message[:i]
54+
lines = header.split('\x0d\x0a')
55+
data = message[i+4:]
56+
57+
headers = {}
58+
for i in range(1, len(lines)):
59+
n,v = lines[i].split(': ')
60+
headers[n.strip()] = v.strip()
61+
62+
headers, data = encode_decode(headers,data)
63+
64+
header = "%s\r\n" % lines[0]
65+
for n,v in headers.iteritems():
66+
header += '%s: %s\r\n' % (n,v)
67+
header += '\r\n'
68+
return array.array('b', header+data)
69+

‎parse.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python2
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
if __name__ == '__main__':
31+
import sys
32+
from wcf.records import Record,print_records
33+
fp = sys.stdin
34+
if len(sys.argv) > 1:
35+
filename = sys.argv[1]
36+
fp = open(filename, 'rb')
37+
38+
with fp:
39+
records = Record.parse(fp)
40+
print_records(records)

‎proxy.py

+435
Large diffs are not rendered by default.

‎records.py

-1,266
This file was deleted.

‎setup.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
from setuptools import setup
3+
4+
setup(
5+
name = "wcf-binary parser",
6+
version = "0.4",
7+
author = "Timo Schmid",
8+
author_email = "tschmid@ernw.de",
9+
description = ("A library for transforming wcf-binary data from and to xml"),
10+
license = "BSD",
11+
keywords = "wcf wcf-binary xml",
12+
url = "",
13+
packages=['wcf', 'tests'],
14+
long_description="",
15+
test_suite="tests.alltests.Suite",
16+
classifiers=[
17+
"Development Status :: 4 - Beta",
18+
"Topic :: Utilities",
19+
"License :: OSI Approved :: BSD License",
20+
"Programming Language :: Python :: 2",
21+
"Topic :: Internet :: WWW/HTTP",
22+
"Topic :: Software Development :: Libraries :: Python Modules",
23+
24+
],
25+
)

‎tests/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import alltests

‎tests/alltests.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python2
2+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
3+
4+
from __future__ import absolute_import
5+
import logging
6+
import doctest
7+
import unittest
8+
9+
from wcf.records import *
10+
11+
test_bin = (
12+
"56020b0173040b0161065608440a1e0082993a687474703a2f2f646f6373"
13+
"2e6f617369732d6f70656e2e6f72672f77732d73782f77732d7472757374"
14+
"2f3230303531322f5253542f4973737565441aad5db293d4bc0ba547b9dc"
15+
"cb2f140fd0c3442c442aab1401440c1e0082993c687474703a2f2f646377"
16+
"733a35383435312f44617465762f4672616d65776f726b2f52656d6f7465"
17+
"536572766963654d6f64656c2f456e61626c657201560e41057472757374"
18+
"14526571756573745365637572697479546f6b656e0407436f6e74657874"
19+
"982c757569642d34393037636538322d303630372d346263302d62643861"
20+
"2d6531633937663165323862372d32320905747275737430687474703a2f"
21+
"2f646f63732e6f617369732d6f70656e2e6f72672f77732d73782f77732d"
22+
"74727573742f3230303531324105747275737409546f6b656e5479706599"
23+
"41687474703a2f2f646f63732e6f617369732d6f70656e2e6f72672f7773"
24+
"2d73782f77732d736563757265636f6e766572736174696f6e2f32303035"
25+
"31322f736374410574727573740b52657175657374547970659936687474"
26+
"703a2f2f646f63732e6f617369732d6f70656e2e6f72672f77732d73782f"
27+
"77732d74727573742f3230303531322f497373756541057472757374074b"
28+
"657953697a658b0001410574727573740e42696e61727945786368616e67"
29+
"650674aaa60306d402aad8029e364e544c4d5353500001000000b7b218e2"
30+
"0a000a002d00000005000500280000000601b11d0000000f434c57533157"
31+
"45425345525649439f0145010101").decode('hex')
32+
33+
class TransformTest(unittest.TestCase):
34+
35+
def runTest(self):
36+
from StringIO import StringIO
37+
sio = StringIO(test_bin)
38+
new = dump_records(Record.parse(sio))
39+
40+
self.assertEqual(test_bin, new)
41+
42+
class Suite(unittest.TestSuite):
43+
44+
def __init__(self, *args, **kwargs):
45+
super(Suite, self).__init__(*args, **kwargs)
46+
47+
self.addTest(doctest.DocTestSuite(base))
48+
self.addTest(doctest.DocTestSuite(elements))
49+
self.addTest(doctest.DocTestSuite(attributes))
50+
self.addTest(doctest.DocTestSuite(text))
51+
self.addTest(TransformTest())
52+
53+
if __name__ == '__main__':
54+
unittest.main()

‎wcf/MyHTMLParser.py

+396
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,396 @@
1+
"""A parser for HTML and XHTML."""
2+
# This file is a modified Version from htmlparser.HTMLParser
3+
# in this version tag and attribute names aren't converted to
4+
# lowercase
5+
6+
# This file is based on sgmllib.py, but the API is slightly different.
7+
8+
# XXX There should be a way to distinguish between PCDATA (parsed
9+
# character data -- the normal case), RCDATA (replaceable character
10+
# data -- only char and entity references and end tags are special)
11+
# and CDATA (character data -- only end tags are special).
12+
13+
14+
import markupbase
15+
import re
16+
17+
# Regular expressions used for parsing
18+
19+
interesting_normal = re.compile('[&<]')
20+
interesting_cdata = re.compile(r'<(/|\Z)')
21+
incomplete = re.compile('&[a-zA-Z#]')
22+
23+
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
24+
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
25+
26+
starttagopen = re.compile('<[a-zA-Z]')
27+
piclose = re.compile('>')
28+
commentclose = re.compile(r'--\s*>')
29+
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
30+
attrfind = re.compile(
31+
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
32+
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
33+
34+
locatestarttagend = re.compile(r"""
35+
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
36+
(?:\s+ # whitespace before attribute name
37+
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
38+
(?:\s*=\s* # value indicator
39+
(?:'[^']*' # LITA-enclosed value
40+
|\"[^\"]*\" # LIT-enclosed value
41+
|[^'\">\s]+ # bare value
42+
)
43+
)?
44+
)
45+
)*
46+
\s* # trailing whitespace
47+
""", re.VERBOSE)
48+
endendtag = re.compile('>')
49+
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
50+
51+
52+
class HTMLParseError(Exception):
53+
"""Exception raised for all parse errors."""
54+
55+
def __init__(self, msg, position=(None, None)):
56+
assert msg
57+
self.msg = msg
58+
self.lineno = position[0]
59+
self.offset = position[1]
60+
61+
def __str__(self):
62+
result = self.msg
63+
if self.lineno is not None:
64+
result = result + ", at line %d" % self.lineno
65+
if self.offset is not None:
66+
result = result + ", column %d" % (self.offset + 1)
67+
return result
68+
69+
70+
class HTMLParser(markupbase.ParserBase):
71+
"""Find tags and other markup and call handler functions.
72+
73+
Usage:
74+
p = HTMLParser()
75+
p.feed(data)
76+
...
77+
p.close()
78+
79+
Start tags are handled by calling self.handle_starttag() or
80+
self.handle_startendtag(); end tags by self.handle_endtag(). The
81+
data between tags is passed from the parser to the derived class
82+
by calling self.handle_data() with the data as argument (the data
83+
may be split up in arbitrary chunks). Entity references are
84+
passed by calling self.handle_entityref() with the entity
85+
reference as the argument. Numeric character references are
86+
passed to self.handle_charref() with the string containing the
87+
reference as the argument.
88+
"""
89+
90+
CDATA_CONTENT_ELEMENTS = ("script", "style")
91+
92+
93+
def __init__(self):
94+
"""Initialize and reset this instance."""
95+
self.reset()
96+
97+
def reset(self):
98+
"""Reset this instance. Loses all unprocessed data."""
99+
self.rawdata = ''
100+
self.lasttag = '???'
101+
self.interesting = interesting_normal
102+
markupbase.ParserBase.reset(self)
103+
104+
def feed(self, data):
105+
r"""Feed data to the parser.
106+
107+
Call this as often as you want, with as little or as much text
108+
as you want (may include '\n').
109+
"""
110+
self.rawdata = self.rawdata + data
111+
self.goahead(0)
112+
113+
def close(self):
114+
"""Handle any buffered data."""
115+
self.goahead(1)
116+
117+
def error(self, message):
118+
raise HTMLParseError(message, self.getpos())
119+
120+
__starttag_text = None
121+
122+
def get_starttag_text(self):
123+
"""Return full source of start tag: '<...>'."""
124+
return self.__starttag_text
125+
126+
def set_cdata_mode(self):
127+
self.interesting = interesting_cdata
128+
129+
def clear_cdata_mode(self):
130+
self.interesting = interesting_normal
131+
132+
# Internal -- handle data as far as reasonable. May leave state
133+
# and data to be processed by a subsequent call. If 'end' is
134+
# true, force handling all data as if followed by EOF marker.
135+
def goahead(self, end):
136+
rawdata = self.rawdata
137+
i = 0
138+
n = len(rawdata)
139+
while i < n:
140+
match = self.interesting.search(rawdata, i) # < or &
141+
if match:
142+
j = match.start()
143+
else:
144+
j = n
145+
if i < j: self.handle_data(rawdata[i:j])
146+
i = self.updatepos(i, j)
147+
if i == n: break
148+
startswith = rawdata.startswith
149+
if startswith('<', i):
150+
if starttagopen.match(rawdata, i): # < + letter
151+
k = self.parse_starttag(i)
152+
elif startswith("</", i):
153+
k = self.parse_endtag(i)
154+
elif startswith("<!--", i):
155+
k = self.parse_comment(i)
156+
elif startswith("<?", i):
157+
k = self.parse_pi(i)
158+
elif startswith("<!", i):
159+
k = self.parse_declaration(i)
160+
elif (i + 1) < n:
161+
self.handle_data("<")
162+
k = i + 1
163+
else:
164+
break
165+
if k < 0:
166+
if end:
167+
self.error("EOF in middle of construct")
168+
break
169+
i = self.updatepos(i, k)
170+
elif startswith("&#", i):
171+
match = charref.match(rawdata, i)
172+
if match:
173+
name = match.group()[2:-1]
174+
self.handle_charref(name)
175+
k = match.end()
176+
if not startswith(';', k-1):
177+
k = k - 1
178+
i = self.updatepos(i, k)
179+
continue
180+
else:
181+
if ";" in rawdata[i:]: #bail by consuming &#
182+
self.handle_data(rawdata[0:2])
183+
i = self.updatepos(i, 2)
184+
break
185+
elif startswith('&', i):
186+
match = entityref.match(rawdata, i)
187+
if match:
188+
name = match.group(1)
189+
self.handle_entityref(name)
190+
k = match.end()
191+
if not startswith(';', k-1):
192+
k = k - 1
193+
i = self.updatepos(i, k)
194+
continue
195+
match = incomplete.match(rawdata, i)
196+
if match:
197+
# match.group() will contain at least 2 chars
198+
if end and match.group() == rawdata[i:]:
199+
self.error("EOF in middle of entity or char ref")
200+
# incomplete
201+
break
202+
elif (i + 1) < n:
203+
# not the end of the buffer, and can't be confused
204+
# with some other construct
205+
self.handle_data("&")
206+
i = self.updatepos(i, i + 1)
207+
else:
208+
break
209+
else:
210+
assert 0, "interesting.search() lied"
211+
# end while
212+
if end and i < n:
213+
self.handle_data(rawdata[i:n])
214+
i = self.updatepos(i, n)
215+
self.rawdata = rawdata[i:]
216+
217+
# Internal -- parse processing instr, return end or -1 if not terminated
218+
def parse_pi(self, i):
219+
rawdata = self.rawdata
220+
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
221+
match = piclose.search(rawdata, i+2) # >
222+
if not match:
223+
return -1
224+
j = match.start()
225+
self.handle_pi(rawdata[i+2: j])
226+
j = match.end()
227+
return j
228+
229+
# Internal -- handle starttag, return end or -1 if not terminated
230+
def parse_starttag(self, i):
231+
self.__starttag_text = None
232+
endpos = self.check_for_whole_start_tag(i)
233+
if endpos < 0:
234+
return endpos
235+
rawdata = self.rawdata
236+
self.__starttag_text = rawdata[i:endpos]
237+
238+
# Now parse the data between i+1 and j into a tag and attrs
239+
attrs = []
240+
match = tagfind.match(rawdata, i+1)
241+
assert match, 'unexpected call to parse_starttag()'
242+
k = match.end()
243+
self.lasttag = tag = rawdata[i+1:k]
244+
245+
while k < endpos:
246+
m = attrfind.match(rawdata, k)
247+
if not m:
248+
break
249+
attrname, rest, attrvalue = m.group(1, 2, 3)
250+
if not rest:
251+
attrvalue = None
252+
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
253+
attrvalue[:1] == '"' == attrvalue[-1:]:
254+
attrvalue = attrvalue[1:-1]
255+
attrvalue = self.unescape(attrvalue)
256+
attrs.append((attrname, attrvalue))
257+
k = m.end()
258+
259+
end = rawdata[k:endpos].strip()
260+
if end not in (">", "/>"):
261+
lineno, offset = self.getpos()
262+
if "\n" in self.__starttag_text:
263+
lineno = lineno + self.__starttag_text.count("\n")
264+
offset = len(self.__starttag_text) \
265+
- self.__starttag_text.rfind("\n")
266+
else:
267+
offset = offset + len(self.__starttag_text)
268+
self.error("junk characters in start tag: %r"
269+
% (rawdata[k:endpos][:20],))
270+
if end.endswith('/>'):
271+
# XHTML-style empty tag: <span attr="value" />
272+
self.handle_startendtag(tag, attrs)
273+
else:
274+
self.handle_starttag(tag, attrs)
275+
if tag in self.CDATA_CONTENT_ELEMENTS:
276+
self.set_cdata_mode()
277+
return endpos
278+
279+
# Internal -- check to see if we have a complete starttag; return end
280+
# or -1 if incomplete.
281+
def check_for_whole_start_tag(self, i):
282+
rawdata = self.rawdata
283+
m = locatestarttagend.match(rawdata, i)
284+
if m:
285+
j = m.end()
286+
next = rawdata[j:j+1]
287+
if next == ">":
288+
return j + 1
289+
if next == "/":
290+
if rawdata.startswith("/>", j):
291+
return j + 2
292+
if rawdata.startswith("/", j):
293+
# buffer boundary
294+
return -1
295+
# else bogus input
296+
self.updatepos(i, j + 1)
297+
self.error("malformed empty start tag")
298+
if next == "":
299+
# end of input
300+
return -1
301+
if next in ("abcdefghijklmnopqrstuvwxyz=/"
302+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
303+
# end of input in or before attribute value, or we have the
304+
# '/' from a '/>' ending
305+
return -1
306+
self.updatepos(i, j)
307+
self.error("malformed start tag")
308+
raise AssertionError("we should not get here!")
309+
310+
# Internal -- parse endtag, return end or -1 if incomplete
311+
def parse_endtag(self, i):
312+
rawdata = self.rawdata
313+
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
314+
match = endendtag.search(rawdata, i+1) # >
315+
if not match:
316+
return -1
317+
j = match.end()
318+
match = endtagfind.match(rawdata, i) # </ + tag + >
319+
if not match:
320+
self.error("bad end tag: %r" % (rawdata[i:j],))
321+
tag = match.group(1)
322+
self.handle_endtag(tag)
323+
self.clear_cdata_mode()
324+
return j
325+
326+
# Overridable -- finish processing of start+end tag: <tag.../>
327+
def handle_startendtag(self, tag, attrs):
328+
self.handle_starttag(tag, attrs)
329+
self.handle_endtag(tag)
330+
331+
# Overridable -- handle start tag
332+
def handle_starttag(self, tag, attrs):
333+
pass
334+
335+
# Overridable -- handle end tag
336+
def handle_endtag(self, tag):
337+
pass
338+
339+
# Overridable -- handle character reference
340+
def handle_charref(self, name):
341+
pass
342+
343+
# Overridable -- handle entity reference
344+
def handle_entityref(self, name):
345+
pass
346+
347+
# Overridable -- handle data
348+
def handle_data(self, data):
349+
pass
350+
351+
# Overridable -- handle comment
352+
def handle_comment(self, data):
353+
pass
354+
355+
# Overridable -- handle declaration
356+
def handle_decl(self, decl):
357+
pass
358+
359+
# Overridable -- handle processing instruction
360+
def handle_pi(self, data):
361+
pass
362+
363+
def unknown_decl(self, data):
364+
self.error("unknown declaration: %r" % (data,))
365+
366+
# Internal -- helper to remove special character quoting
367+
entitydefs = None
368+
def unescape(self, s):
369+
if '&' not in s:
370+
return s
371+
def replaceEntities(s):
372+
s = s.groups()[0]
373+
try:
374+
if s[0] == "#":
375+
s = s[1:]
376+
if s[0] in ['x','X']:
377+
c = int(s[1:], 16)
378+
else:
379+
c = int(s)
380+
return unichr(c)
381+
except ValueError:
382+
return '&#'+s+';'
383+
else:
384+
# Cannot use name2codepoint directly, because HTMLParser supports apos,
385+
# which is not part of HTML 4
386+
import htmlentitydefs
387+
if HTMLParser.entitydefs is None:
388+
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
389+
for k, v in htmlentitydefs.name2codepoint.iteritems():
390+
entitydefs[k] = unichr(v)
391+
try:
392+
return self.entitydefs[s]
393+
except KeyError:
394+
return '&'+s+';'
395+
396+
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)

‎wcf/__init__.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+

‎datatypes.py ‎wcf/datatypes.py

+52-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
130
import struct
231
import logging
332

@@ -50,7 +79,7 @@ def to_bytes(self):
5079
return struct.pack('<BB',
5180
value_a | 0x80,
5281
value_b)
53-
elif value_a != 0:
82+
else:
5483
return struct.pack('<B',
5584
value_a)
5685

@@ -79,21 +108,38 @@ def __init__(self, *args):
79108

80109
def to_bytes(self):
81110
"""
82-
>>> Utf8String("abc").to_bytes()
111+
>>> Utf8String(u"abc").to_bytes()
83112
'\\x03\x61\x62\x63'
113+
>>> Utf8String(u"\xfcber").to_bytes()
114+
'\\x05\\xc3\\xbcber'
115+
>>> Utf8String("\\xc3\\xbcber".decode('utf-8')).to_bytes()
116+
'\\x05\\xc3\\xbcber'
84117
"""
85-
strlen = len(self.value)
118+
data = self.value.encode('utf-8')
119+
strlen = len(data)
86120

87-
return MultiByteInt31(strlen).to_bytes() + self.value
121+
return MultiByteInt31(strlen).to_bytes() + data
88122

89123
def __str__(self):
90-
return str(self.value)
124+
return self.value.decode('latin1')
125+
126+
def __unicode__(self):
127+
return self.value
91128

92129
@classmethod
93130
def parse(cls, fp):
131+
"""
132+
>>> from StringIO import StringIO as io
133+
>>> fp = io("\\x05\\xc3\\xbcber")
134+
>>> s = Utf8String.parse(fp)
135+
>>> s.to_bytes()
136+
'\\x05\\xc3\\xbcber'
137+
>>> print str(s)
138+
'über'
139+
"""
94140
lngth = struct.unpack('<B', fp.read(1))[0]
95141

96-
return cls(fp.read(lngth))
142+
return cls(fp.read(lngth).decode('utf-8'))
97143

98144
class Decimal(object):
99145
def __init__(self, sign, high, low, scale):

‎dictionary.py ‎wcf/dictionary.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
129
dictionary = {
230
0x00 : 'mustUnderstand',
331
0x02 : 'Envelope',
@@ -486,3 +514,5 @@
486514
0x3CA : 'faultactor',
487515
0x3CC : 'detail',
488516
}
517+
518+
inverted_dict = dict([(v, k) for (k, v) in dictionary.iteritems()])

‎wcf/records/__init__.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
from __future__ import absolute_import
30+
import sys
31+
import logging
32+
33+
log = logging.getLogger(__name__)
34+
35+
from wcf.records.base import *
36+
from wcf.records.text import *
37+
from wcf.records.attributes import *
38+
from wcf.records.elements import *
39+
40+
def print_records(records, skip=0, fp=None):
41+
if records == None:
42+
return
43+
if fp == None:
44+
fp = sys.stdout
45+
46+
was_el = False
47+
for r in records:
48+
if isinstance(r, EndElementRecord):
49+
continue
50+
if isinstance(r, Element):
51+
fp.write('\n' + ' ' * skip + str(r))
52+
else:
53+
fp.write(str(r))
54+
55+
new_line = False
56+
if hasattr(r, 'childs'):
57+
new_line = print_records(r.childs, skip+1, fp)
58+
if isinstance(r, Element):
59+
if new_line:
60+
fp.write('\n' + ' ' * skip)
61+
if hasattr(r, 'prefix'):
62+
fp.write('</%s:%s>' % (r.prefix, r.name))
63+
else:
64+
fp.write('</%s>' % r.name)
65+
was_el = True
66+
else:
67+
was_el = False
68+
return was_el
69+
70+
def repr_records(records, skip=0):
71+
if records == None:
72+
return
73+
74+
for r in records:
75+
print ' '*skip + str(r)
76+
if hasattr(r, 'childs'):
77+
repr_records(r.childs, skip+1)
78+
79+
def dump_records(records):
80+
out = ''
81+
82+
for r in records:
83+
msg = 'Write %s' % type(r).__name__
84+
if r == records[-1]:
85+
if isinstance(r, Text):
86+
r.type = r.type + 1
87+
msg += ' with EndElement (0x%X)' % r.type
88+
log.debug(msg)
89+
log.debug('Value %s' % str(r))
90+
if isinstance(r, Element) and not isinstance(r, EndElementRecord) and len(r.attributes):
91+
log.debug(' Attributes:')
92+
for a in r.attributes:
93+
log.debug(' %s: %s' % (type(a).__name__, str(a)))
94+
out += r.to_bytes()
95+
96+
if hasattr(r, 'childs'):
97+
out += dump_records(r.childs)
98+
if len(r.childs) == 0 or not isinstance(r.childs[-1], Text):
99+
log.debug('Write EndElement for %s' % r.name)
100+
out += EndElementRecord().to_bytes()
101+
elif isinstance(r, Element) and not isinstance(r, EndElementRecord):
102+
log.debug('Write EndElement for %s' % (r.name,))
103+
out += EndElementRecord().to_bytes()
104+
105+
return out

‎wcf/records/attributes.py

+333
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
from __future__ import absolute_import
30+
31+
import struct
32+
import logging
33+
34+
log = logging.getLogger(__name__)
35+
logging.basicConfig(level=logging.INFO)
36+
37+
from wcf.datatypes import *
38+
from wcf.records.base import *
39+
from wcf.records.text import *
40+
from wcf.dictionary import dictionary
41+
42+
class ShortAttributeRecord(Attribute):
43+
type = 0x04
44+
45+
def __init__(self, name, value):
46+
self.name = name
47+
self.value = value
48+
49+
def to_bytes(self):
50+
"""
51+
>>> ShortAttributeRecord('test', TrueTextRecord()).to_bytes()
52+
'\\x04\\x04test\\x86'
53+
"""
54+
bytes = super(ShortAttributeRecord, self).to_bytes()
55+
bytes += Utf8String(self.name).to_bytes()
56+
bytes += self.value.to_bytes()
57+
58+
return bytes
59+
60+
def __str__(self):
61+
return '%s="%s"' % (self.name, str(self.value))
62+
63+
@classmethod
64+
def parse(cls, fp):
65+
name = Utf8String.parse(fp).value
66+
type = struct.unpack('<B', fp.read(1))[0]
67+
value= Record.records[type].parse(fp)
68+
69+
return cls(name, value)
70+
71+
class AttributeRecord(Attribute):
72+
type = 0x05
73+
74+
def __init__(self, prefix, name, value):
75+
self.prefix = prefix
76+
self.name = name
77+
self.value = value
78+
79+
def to_bytes(self):
80+
"""
81+
>>> AttributeRecord('x', 'test', TrueTextRecord()).to_bytes()
82+
'\\x05\\x01x\\x04test\\x86'
83+
"""
84+
bytes = super(AttributeRecord, self).to_bytes()
85+
bytes += Utf8String(self.prefix).to_bytes()
86+
bytes += Utf8String(self.name).to_bytes()
87+
bytes += self.value.to_bytes()
88+
89+
return bytes
90+
91+
def __str__(self):
92+
return '%s:%s="%s"' % (self.prefix, self.name, str(self.value))
93+
94+
@classmethod
95+
def parse(cls, fp):
96+
prefix = Utf8String.parse(fp).value
97+
name = Utf8String.parse(fp).value
98+
type = struct.unpack('<B', fp.read(1))[0]
99+
value= Record.records[type].parse(fp)
100+
101+
return cls(prefix, name, value)
102+
103+
104+
class ShortDictionaryAttributeRecord(Attribute):
105+
type = 0x06
106+
107+
def __init__(self, index, value):
108+
self.index = index
109+
self.value = value
110+
111+
def to_bytes(self):
112+
"""
113+
>>> ShortDictionaryAttributeRecord(3, TrueTextRecord()).to_bytes()
114+
'\\x06\\x03\\x86'
115+
"""
116+
bytes = super(ShortDictionaryAttributeRecord, self).to_bytes()
117+
bytes += MultiByteInt31(self.index).to_bytes()
118+
bytes += self.value.to_bytes()
119+
120+
return bytes
121+
122+
def __str__(self):
123+
return '%s="%s"' % (dictionary[self.index], str(self.value))
124+
125+
@classmethod
126+
def parse(cls, fp):
127+
index = MultiByteInt31.parse(fp).value
128+
type = struct.unpack('<B', fp.read(1))[0]
129+
value= Record.records[type].parse(fp)
130+
131+
return cls(index, value)
132+
133+
134+
135+
class DictionaryAttributeRecord(Attribute):
136+
type = 0x07
137+
138+
def __init__(self, prefix, index, value):
139+
self.prefix = prefix
140+
self.index = index
141+
self.value = value
142+
143+
def to_bytes(self):
144+
"""
145+
>>> DictionaryAttributeRecord('x', 2, TrueTextRecord()).to_bytes()
146+
'\\x07\\x01x\\x02\\x86'
147+
"""
148+
bytes = super(DictionaryAttributeRecord, self).to_bytes()
149+
bytes += Utf8String(self.prefix).to_bytes()
150+
bytes += MultiByteInt31(self.index).to_bytes()
151+
bytes += self.value.to_bytes()
152+
153+
return bytes
154+
155+
def __str__(self):
156+
return '%s:%s="%s"' % (self.prefix, dictionary[self.index], str(self.value))
157+
158+
@classmethod
159+
def parse(cls, fp):
160+
prefix = Utf8String.parse(fp).value
161+
index = MultiByteInt31.parse(fp).value
162+
type = struct.unpack('<B', fp.read(1))[0]
163+
value= Record.records[type].parse(fp)
164+
165+
return cls(prefix, index, value)
166+
167+
class ShortDictionaryXmlnsAttributeRecord(Attribute):
168+
type = 0x0A
169+
170+
def __init__(self, index):
171+
self.index = index
172+
173+
def __str__(self):
174+
return 'xmlns="%s"' % (dictionary[self.index],)
175+
176+
def to_bytes(self):
177+
"""
178+
>>> ShortDictionaryXmlnsAttributeRecord( 6).to_bytes()
179+
'\\n\\x06'
180+
"""
181+
bytes = struct.pack('<B', self.type)
182+
bytes += MultiByteInt31(self.index).to_bytes()
183+
184+
return bytes
185+
186+
@classmethod
187+
def parse(cls, fp):
188+
index = MultiByteInt31.parse(fp).value
189+
return cls(index)
190+
191+
192+
class DictionaryXmlnsAttributeRecord(Attribute):
193+
type = 0x0B
194+
195+
def __init__(self, prefix, index):
196+
self.prefix = prefix
197+
self.index = index
198+
199+
def __str__(self):
200+
return 'xmlns:%s="%s"' % (self.prefix, dictionary[self.index])
201+
202+
def to_bytes(self):
203+
"""
204+
>>> DictionaryXmlnsAttributeRecord('a', 6).to_bytes()
205+
'\\x0b\\x01\x61\\x06'
206+
"""
207+
bytes = struct.pack('<B', self.type)
208+
bytes += Utf8String(self.prefix).to_bytes()
209+
bytes += MultiByteInt31(self.index).to_bytes()
210+
211+
return bytes
212+
213+
@classmethod
214+
def parse(cls, fp):
215+
prefix = Utf8String.parse(fp).value
216+
index = MultiByteInt31.parse(fp).value
217+
return cls(prefix, index)
218+
219+
class ShortXmlnsAttributeRecord(Attribute):
220+
type = 0x08
221+
222+
def __init__(self, value, *args, **kwargs):
223+
super(ShortXmlnsAttributeRecord, self).__init__(*args, **kwargs)
224+
self.value = value
225+
226+
def to_bytes(self):
227+
bytes = struct.pack('<B', self.type)
228+
bytes += Utf8String(self.value).to_bytes()
229+
return bytes
230+
231+
def __str__(self):
232+
return 'xmlns="%s"' % (self.value,)
233+
234+
@classmethod
235+
def parse(cls, fp):
236+
value = Utf8String.parse(fp).value
237+
return cls(value)
238+
239+
240+
class XmlnsAttributeRecord(Attribute):
241+
type = 0x09
242+
243+
def __init__(self, name, value, *args, **kwargs):
244+
super(XmlnsAttributeRecord, self).__init__(*args, **kwargs)
245+
self.name = name
246+
self.value = value
247+
248+
def to_bytes(self):
249+
bytes = struct.pack('<B', self.type)
250+
bytes += Utf8String(self.name).to_bytes()
251+
bytes += Utf8String(self.value).to_bytes()
252+
return bytes
253+
254+
def __str__(self):
255+
return 'xmlns:%s="%s"' % (self.name, self.value)
256+
257+
@classmethod
258+
def parse(cls, fp):
259+
name = Utf8String.parse(fp).value
260+
value = Utf8String.parse(fp).value
261+
return cls(name, value)
262+
263+
class PrefixAttributeRecord(AttributeRecord):
264+
def __init__(self, name, value):
265+
super(PrefixAttributeRecord, self).__init__(self.char, name, value)
266+
267+
def to_bytes(self):
268+
string = Utf8String(self.name)
269+
return struct.pack('<B', self.type) + string.to_bytes() + self.value.to_bytes()
270+
271+
@classmethod
272+
def parse(cls, fp):
273+
name = Utf8String.parse(fp).value
274+
type = struct.unpack('<B', fp.read(1))[0]
275+
value= Record.records[type].parse(fp)
276+
return cls(name, value)
277+
278+
class PrefixDictionaryAttributeRecord(DictionaryAttributeRecord):
279+
def __init__(self, index, value):
280+
super(PrefixDictionaryAttributeRecord, self).__init__(self.char, index, value)
281+
282+
def to_bytes(self):
283+
idx = MultiByteInt31(self.index)
284+
return struct.pack('<B', self.type) + idx.to_bytes() + self.value.to_bytes()
285+
286+
@classmethod
287+
def parse(cls, fp):
288+
index= MultiByteInt31.parse(fp).value
289+
type = struct.unpack('<B', fp.read(1))[0]
290+
value= Record.records[type].parse(fp)
291+
return cls(index, value)
292+
293+
294+
Record.add_records((
295+
ShortAttributeRecord,
296+
AttributeRecord,
297+
ShortDictionaryAttributeRecord,
298+
DictionaryAttributeRecord,
299+
ShortDictionaryXmlnsAttributeRecord,
300+
DictionaryXmlnsAttributeRecord,
301+
ShortXmlnsAttributeRecord,
302+
XmlnsAttributeRecord,
303+
))
304+
305+
306+
__records__ = []
307+
308+
for c in range(0x0C, 0x25 + 1):
309+
char = chr(c-0x0C + ord('a'))
310+
cls = type(
311+
'PrefixDictionaryAttribute' + char.upper() + 'Record',
312+
(PrefixDictionaryAttributeRecord,),
313+
dict(
314+
type = c,
315+
char = char,
316+
)
317+
)
318+
__records__.append(cls)
319+
320+
for c in range(0x26, 0x3F + 1):
321+
char = chr(c-0x26 + ord('a'))
322+
cls = type(
323+
'PrefixAttribute' + char.upper() + 'Record',
324+
(PrefixAttributeRecord,),
325+
dict(
326+
type = c,
327+
char = char,
328+
)
329+
)
330+
__records__.append(cls)
331+
332+
Record.add_records(__records__)
333+
del __records__

‎wcf/records/base.py

+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
from __future__ import absolute_import
30+
31+
import struct
32+
import logging
33+
34+
from wcf.datatypes import *
35+
36+
log = logging.getLogger(__name__)
37+
logging.basicConfig(level=logging.INFO)
38+
39+
class Record(object):
40+
records = dict()
41+
42+
@classmethod
43+
def add_records(cls, records):
44+
for r in records:
45+
Record.records[r.type] = r
46+
47+
def __init__(self, type=None):
48+
if type:
49+
self.type = type
50+
51+
def to_bytes(self):
52+
"""
53+
>>> Record(0xff).to_bytes()
54+
'\\xff'
55+
"""
56+
return struct.pack('<B', self.type)
57+
58+
59+
@classmethod
60+
def parse(cls, fp):
61+
if cls != Record:
62+
return cls()
63+
root = []
64+
records = root
65+
parents = []
66+
last_el = None
67+
type = True
68+
while type:
69+
type = fp.read(1)
70+
if type:
71+
type = struct.unpack('<B', type)[0]
72+
if type in Record.records:
73+
log.debug('%s found' % Record.records[type].__name__)
74+
obj = Record.records[type].parse(fp)
75+
if isinstance(obj, EndElementRecord):
76+
if len(parents) > 0:
77+
records = parents.pop()
78+
#records.append(obj)
79+
elif isinstance(obj, Element):
80+
last_el = obj
81+
records.append(obj)
82+
parents.append(records)
83+
obj.childs = []
84+
records = obj.childs
85+
elif isinstance(obj, Attribute) and last_el:
86+
last_el.attributes.append(obj)
87+
else:
88+
records.append(obj)
89+
log.debug('Value: %s' % str(obj))
90+
elif type-1 in Record.records:
91+
log.debug('%s with end element found (0x%x)' %
92+
(Record.records[type-1].__name__, type))
93+
records.append(Record.records[type-1].parse(fp))
94+
#records.append(EndElementRecord())
95+
last_el = None
96+
if len(parents) > 0:
97+
records = parents.pop()
98+
else:
99+
log.warn('type 0x%x not found' % type)
100+
101+
return root
102+
103+
class Element(Record):
104+
pass
105+
106+
class Attribute(Record):
107+
pass
108+
109+
class Text(Record):
110+
pass
111+
112+
class EndElementRecord(Element):
113+
type = 0x01
114+
115+
class CommentRecord(Record):
116+
type = 0x02
117+
118+
def __init__(self, comment, *args, **kwargs):
119+
self.comment = comment
120+
121+
def to_bytes(self):
122+
"""
123+
>>> CommentRecord('test').to_bytes()
124+
'\\x02\\x04test'
125+
"""
126+
string = Utf8String(self.comment)
127+
128+
return (super(CommentRecord, self).to_bytes() +
129+
string.to_bytes())
130+
131+
def __str__(self):
132+
"""
133+
>>> str(CommentRecord('test'))
134+
'<!-- test -->'
135+
"""
136+
return '<!-- %s -->' % self.comment
137+
138+
@classmethod
139+
def parse(cls, fp):
140+
data = Utf8String.parse(fp).value
141+
return cls(data)
142+
143+
class ArrayRecord(Record):
144+
type = 0x03
145+
146+
datatypes = {
147+
0xB5 : ('BoolTextWithEndElement', 1, '?'),
148+
0x8B : ('Int16TextWithEndElement', 2, 'h'),
149+
0x8D : ('Int32TextWithEndElement', 4, 'i'),
150+
0x8F : ('Int64TextWithEndElement', 8, 'q'),
151+
0x91 : ('FloatTextWithEndElement', 4, 'f'),
152+
0x93 : ('DoubleTextWithEndElement', 8, 'd'),
153+
0x95 : ('DecimalTextWithEndElement', 16, ''),
154+
0x97 : ('DateTimeTextWithEndElement', 8, ''),
155+
0xAF : ('TimeSpanTextWithEndElement', 8, ''),
156+
0xB1 : ('UuidTextWithEndElement', 16, ''),
157+
}
158+
159+
def __init__(self, element, recordtype, data):
160+
self.element = element
161+
self.recordtype = recordtype
162+
self.count = len(data)
163+
self.data = data
164+
165+
def to_bytes(self):
166+
"""
167+
>>> from wcf.records.elements import ShortElementRecord
168+
>>> ArrayRecord(ShortElementRecord('item'), 0x8D, ['\\x01\\x00\\x00\\x00', '\\x02\\x00\\x00\\x00', '\\x03\\x00\\x00\\x00']).to_bytes()
169+
'\\x03@\\x04item\\x01\\x8d\\x03\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x03\\x00\\x00\\x00'
170+
"""
171+
bytes = super(ArrayRecord, self).to_bytes()
172+
bytes += self.element.to_bytes()
173+
bytes += EndElementRecord().to_bytes()
174+
bytes += struct.pack('<B', self.recordtype)[0]
175+
bytes += MultiByteInt31(self.count).to_bytes()
176+
for data in self.data:
177+
if type(data) == str:
178+
bytes += data
179+
else:
180+
bytes += data.to_bytes()
181+
182+
return bytes
183+
184+
@classmethod
185+
def parse(cls, fp):
186+
element = struct.unpack('<B', fp.read(1))[0]
187+
element = __records__[element].parse(fp)
188+
recordtype = struct.unpack('<B', fp.read(1))[0]
189+
count = MultiByteInt31.parse(fp).value
190+
data = []
191+
for i in range(count):
192+
data.append(__records__[recordtype-1].parse(fp))
193+
return cls(element, recordtype, data)
194+
195+
def __str__(self):
196+
"""
197+
>>> from wcf.records.elements import ShortElementRecord
198+
>>> from wcf.records.text import Int32TextRecord
199+
>>> str(ArrayRecord(ShortElementRecord('item'), 0x8D, [Int32TextRecord(1),Int32TextRecord(2),Int32TextRecord(3)]))
200+
'<item >1</item><item >2</item><item >3</item>'
201+
"""
202+
string = ''
203+
for data in self.data:
204+
string += str(self.element)
205+
string += str(data)
206+
string += '</%s>' % self.element.name
207+
208+
return string
209+
210+
Record.add_records((EndElementRecord,
211+
CommentRecord,
212+
ArrayRecord,))

‎wcf/records/elements.py

+243
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
2+
# Copyright (c) 2011, Timo Schmid <tschmid@ernw.de>
3+
# All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright
10+
# notice, this list of conditions and the following disclaimer.
11+
# * Redistributions in binary form must reproduce the above copyright
12+
# notice, this list of conditions and the following disclaimer in the
13+
# documentation and/or other materials provided with the distribution.
14+
# * Neither the name of the ERMW GmbH nor the names of its contributors
15+
# may be used to endorse or promote products derived from this software
16+
# without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
from __future__ import absolute_import
30+
31+
import struct
32+
import logging
33+
34+
log = logging.getLogger(__name__)
35+
logging.basicConfig(level=logging.INFO)
36+
37+
from wcf.datatypes import *
38+
from wcf.records.base import *
39+
from wcf.dictionary import *
40+
41+
class ShortElementRecord(Element):
42+
type = 0x40
43+
44+
def __init__(self, name, *args, **kwargs):
45+
self.childs = []
46+
self.name = name
47+
self.attributes = []
48+
49+
def to_bytes(self):
50+
"""
51+
>>> ShortElementRecord('Envelope').to_bytes()
52+
'@\\x08Envelope'
53+
"""
54+
string = Utf8String(self.name)
55+
56+
bytes= (super(ShortElementRecord, self).to_bytes() +
57+
string.to_bytes())
58+
59+
for attr in self.attributes:
60+
bytes += attr.to_bytes()
61+
return bytes
62+
63+
def __str__(self):
64+
#return '<%s[name=%s]>' % (type(self).__name__, self.name)
65+
return '<%s %s>' % (self.name, ' '.join([str(a) for a in self.attributes]))
66+
67+
@classmethod
68+
def parse(cls, fp):
69+
name = Utf8String.parse(fp).value
70+
return cls(name)
71+
72+
class ElementRecord(ShortElementRecord):
73+
type = 0x41
74+
75+
def __init__(self, prefix, name, *args, **kwargs):
76+
super(ElementRecord, self).__init__(name)
77+
self.prefix = prefix
78+
79+
def to_bytes(self):
80+
"""
81+
>>> ElementRecord('x', 'Envelope').to_bytes()
82+
'A\\x01x\\x08Envelope'
83+
"""
84+
pref = Utf8String(self.prefix)
85+
data = super(ElementRecord, self).to_bytes()
86+
type = data[0]
87+
return (type + pref.to_bytes() + data[1:])
88+
89+
def __str__(self):
90+
return '<%s:%s %s>' % (self.prefix, self.name, ' '.join([str(a) for a in self.attributes]))
91+
92+
@classmethod
93+
def parse(cls, fp):
94+
prefix = Utf8String.parse(fp).value
95+
name = Utf8String.parse(fp).value
96+
return cls(prefix, name)
97+
98+
class ShortDictionaryElementRecord(Element):
99+
type = 0x42
100+
101+
def __init__(self, index, *args, **kwargs):
102+
self.childs = []
103+
self.index = index
104+
self.attributes = []
105+
self.name = dictionary[self.index]
106+
107+
def __str__(self):
108+
return '<%s %s>' % (self.name, ' '.join([str(a) for a in
109+
self.attributes]))
110+
111+
def to_bytes(self):
112+
"""
113+
>>> ShortDictionaryElementRecord(2).to_bytes()
114+
'B\\x02'
115+
"""
116+
string = MultiByteInt31(self.index)
117+
118+
bytes= (super(ShortDictionaryElementRecord, self).to_bytes() +
119+
string.to_bytes())
120+
121+
for attr in self.attributes:
122+
bytes += attr.to_bytes()
123+
return bytes
124+
125+
@classmethod
126+
def parse(cls, fp):
127+
index = MultiByteInt31.parse(fp).value
128+
return cls(index)
129+
130+
class DictionaryElementRecord(Element):
131+
type = 0x43
132+
133+
def __init__(self, prefix, index, *args, **kwargs):
134+
self.childs = []
135+
self.prefix = prefix
136+
self.index = index
137+
self.attributes = []
138+
self.name = dictionary[self.index]
139+
140+
def __str__(self):
141+
"""
142+
>>> str(DictionaryElementRecord('x', 2))
143+
'<x:Envelope >'
144+
"""
145+
return '<%s:%s %s>' % (self.prefix, self.name, ' '.join([str(a) for a in self.attributes]))
146+
147+
def to_bytes(self):
148+
"""
149+
>>> DictionaryElementRecord('x', 2).to_bytes()
150+
'C\\x01x\\x02'
151+
"""
152+
pref = Utf8String(self.prefix)
153+
string = MultiByteInt31(self.index)
154+
155+
bytes= (super(DictionaryElementRecord, self).to_bytes() +
156+
pref.to_bytes() +
157+
string.to_bytes())
158+
159+
for attr in self.attributes:
160+
bytes += attr.to_bytes()
161+
return bytes
162+
163+
@classmethod
164+
def parse(cls, fp):
165+
prefix = Utf8String.parse(fp).value
166+
index = MultiByteInt31.parse(fp).value
167+
return cls(prefix, index)
168+
169+
class PrefixElementRecord(ElementRecord):
170+
def __init__(self, name):
171+
super(PrefixElementRecord, self).__init__(self.char, name)
172+
173+
def to_bytes(self):
174+
string = Utf8String(self.name)
175+
176+
bytes= (struct.pack('<B', self.type) +
177+
string.to_bytes())
178+
179+
for attr in self.attributes:
180+
bytes += attr.to_bytes()
181+
return bytes
182+
183+
@classmethod
184+
def parse(cls, fp):
185+
name = Utf8String.parse(fp).value
186+
return cls(name)
187+
188+
class PrefixDictionaryElementRecord(DictionaryElementRecord):
189+
def __init__(self, index):
190+
super(PrefixDictionaryElementRecord, self).__init__(self.char, index)
191+
192+
def to_bytes(self):
193+
string = MultiByteInt31(self.index)
194+
195+
bytes= (struct.pack('<B', self.type) +
196+
string.to_bytes())
197+
198+
for attr in self.attributes:
199+
bytes += attr.to_bytes()
200+
return bytes
201+
202+
@classmethod
203+
def parse(cls, fp):
204+
index = MultiByteInt31.parse(fp).value
205+
return cls(index)
206+
207+
208+
209+
Record.add_records((
210+
ShortElementRecord,
211+
ElementRecord,
212+
ShortDictionaryElementRecord,
213+
DictionaryElementRecord,
214+
))
215+
216+
__records__ = []
217+
218+
for c in range(0x44, 0x5D + 1):
219+
char = chr(c-0x44 + ord('a'))
220+
cls = type(
221+
'PrefixDictionaryElement' + char.upper() + 'Record',
222+
(PrefixDictionaryElementRecord,),
223+
dict(
224+
type = c,
225+
char = char,
226+
)
227+
)
228+
__records__.append(cls)
229+
230+
for c in range(0x5E, 0x77 + 1):
231+
char = chr(c-0x5E + ord('a'))
232+
cls = type(
233+
'PrefixElement' + char.upper() + 'Record',
234+
(PrefixElementRecord,),
235+
dict(
236+
type = c,
237+
char = char,
238+
)
239+
)
240+
__records__.append(cls)
241+
242+
Record.add_records(__records__)
243+
del __records__

‎wcf/records/text.py

+645
Large diffs are not rendered by default.

‎wcf/xml2records.py

+269
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
#!/usr/bin/env python2
2+
# vim: set ts=4 sw=4 tw=79 fileencoding=utf-8:
3+
4+
from wcf.MyHTMLParser import HTMLParser
5+
from htmlentitydefs import name2codepoint
6+
import re
7+
import base64
8+
import logging
9+
10+
log = logging.getLogger(__name__)
11+
12+
from wcf.records import *
13+
from wcf.dictionary import inverted_dict
14+
15+
classes = Record.records.values()
16+
classes = dict([(c.__name__, c) for c in classes])
17+
#inverted_dict = dict([(n,v) for n,v in inverted_dict.iteritems()])
18+
19+
20+
def unescape(s):
21+
return chr(name2codepoint[s]) if (s in name2codepoint) else "&" + s + ";"
22+
23+
int_reg = re.compile(r'^-?\d+$')
24+
uint_reg = re.compile(r'^\d+$')
25+
uuid_reg = re.compile(r'^urn:uuid:(([a-fA-F0-9]{8})-(([a-fA-F0-9]{4})-){3}([a-fA-F0-9]{12}))$')
26+
base64_reg = re.compile(r'^[a-zA-Z0-9/+]*={0,2}$')
27+
float_reg = re.compile(r'^-?(INF)|(NaN)|(\d+(\.\d+)?)$')
28+
datetime_reg = re.compile(r'^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,7})?)?(Z|(\+|-\d{2}:\d{2}))')
29+
30+
class Parser(HTMLParser):
31+
32+
def reset(self):
33+
HTMLParser.reset(self)
34+
self.records = []
35+
self.last_record = Record()
36+
self.last_record.childs = self.records
37+
self.last_record.parent = None
38+
self.data = None
39+
40+
def _parse_tag(self, tag):
41+
if ':' in tag:
42+
prefix = tag[:tag.find(':')]
43+
name = tag[tag.find(':')+1:]
44+
45+
if len(prefix) == 1:
46+
cls_name = 'Element' + prefix.upper() + 'Record'
47+
if name in inverted_dict:
48+
cls_name = 'PrefixDictionary' + cls_name
49+
log.debug('New %s: %s' % (cls_name, name))
50+
return classes[cls_name](inverted_dict[name])
51+
else:
52+
cls_name = 'Prefix' + cls_name
53+
log.debug('New %s: %s' % (cls_name, name))
54+
return classes[cls_name](name)
55+
else:
56+
if name in inverted_dict:
57+
log.debug('New DictionaryElementRecord: %s:%s' % (prefix, name))
58+
return DictionaryElementRecord(prefix,
59+
inverted_dict[name])
60+
else:
61+
log.debug('New ElementRecord: %s:%s' % (prefix, name))
62+
return ElementRecord(prefix, name)
63+
else:
64+
if tag in inverted_dict:
65+
log.debug('New ShortDictionaryElementRecord: %s' % (tag, ))
66+
return ShortDictionaryElementRecord(inverted_dict[tag])
67+
else:
68+
log.debug('New ShortElementRecord: %s' % (tag, ))
69+
return ShortElementRecord(tag)
70+
71+
def _store_data(self, data, end=False):
72+
textrecord = self._parse_data(data)
73+
if isinstance(textrecord, EmptyTextRecord):
74+
return
75+
log.debug('New %s: %s' % (type(textrecord).__name__, data))
76+
77+
self.last_record.childs.append(textrecord)
78+
#if end:
79+
# textrecord.type += 1
80+
81+
def _parse_data(self, data):
82+
data = data.strip()
83+
b64 = False
84+
try:
85+
if base64_reg.match(data):
86+
base64.b64decode(data)
87+
b64 = True
88+
except:
89+
b64 = False
90+
if data == '0':
91+
return ZeroTextRecord()
92+
elif data == '1':
93+
return OneTextRecord()
94+
elif data.lower() == 'false':
95+
return FalseTextRecord()
96+
elif data.lower() == 'true':
97+
return TrueTextRecord()
98+
elif len(data) > 3 and data[1] == ':' and data[2:] in inverted_dict:
99+
return QNameDictionaryTextRecord(data[0], inverted_dict[data[2:]])
100+
elif uuid_reg.match(data):
101+
m = uuid_reg.match(data)
102+
return UniqueIdTextRecord(m.group(1))
103+
elif int_reg.match(data):
104+
val = int(data)
105+
if val < 2**8:
106+
return Int8TextRecord(val)
107+
elif val < 2**16:
108+
return Int16TextRecord(val)
109+
elif val < 2**32:
110+
return Int32TextRecord(val)
111+
elif val < 2**64:
112+
return Int64TextRecord(val)
113+
elif data == '':
114+
return EmptyTextRecord()
115+
elif b64:
116+
data = base64.b64decode(data)
117+
val = len(data)
118+
if val < 2**8:
119+
return Bytes8TextRecord(data)
120+
elif val < 2**16:
121+
return Bytes16TextRecord(data)
122+
elif val < 2**32:
123+
return Bytes32TextRecord(data)
124+
elif float_reg.match(data):
125+
return DoubleTextRecord(float(data))
126+
elif data in inverted_dict:
127+
return DictionaryTextRecord(inverted_dict[data])
128+
elif datetime_reg.match(data) and False:# TODO
129+
t = data.split('Z')
130+
tz = 0
131+
if len(t) > 1:
132+
dt = t[0]
133+
tz = 1 if len(tz[1]) else 2
134+
dt = t[0]
135+
dt = dt.split('.')
136+
ns = 0
137+
if len(dt) > 1:
138+
ns = int(dt[1])
139+
dt = dt[0]
140+
if len(dt) == 10:
141+
dt = datetime.datetime.strptime(dt, "%Y-%m-%d")
142+
elif len(dt) == 16:
143+
dt = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M")
144+
else:
145+
dt = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S")
146+
147+
base_diff = 62135596800.0
148+
dt = int((time.mktime(dt.timetuple()) - base) * 10 + ms)
149+
150+
return DateTimeTextRecord(dt, tz)
151+
else:
152+
val = len(data)
153+
if val < 2**8:
154+
return Char8TextRecord(data)
155+
elif val < 2**16:
156+
return Char16TextRecord(data)
157+
elif val < 2**32:
158+
return Char32TextRecord(data)
159+
160+
161+
def _parse_attr(self, name, value):
162+
163+
if ':' in name:
164+
prefix = name[:name.find(':')]
165+
name = name[name.find(':')+1:]
166+
167+
if prefix == 'xmlns':
168+
if value in inverted_dict:
169+
return DictionaryXmlnsAttributeRecord(name,
170+
inverted_dict[value])
171+
else:
172+
return XmlnsAttributeRecord(name, value)
173+
elif len(prefix) == 1:
174+
value = self._parse_data(value)
175+
cls_name = 'Attribute' + prefix.upper() + 'Record'
176+
if name in inverted_dict:
177+
return classes['PrefixDictionary' +
178+
cls_name](inverted_dict[name], value)
179+
else:
180+
return classes['Prefix' + cls_name](name,value)
181+
else:
182+
value = self._parse_data(value)
183+
if name in inverted_dict:
184+
return DictionaryAttributeRecord(prefix,
185+
inverted_dict[name], value)
186+
else:
187+
return AttributeRecord(prefix, name, value)
188+
elif name == 'xmlns':
189+
if value in inverted_dict:
190+
return ShortDictionaryXmlnsAttributeRecord(inverted_dict[value])
191+
else:
192+
return ShortXmlnsAttributeRecord(value)
193+
else:
194+
value = self._parse_data(value)
195+
if name in inverted_dict:
196+
return ShortDictionaryAttributeRecord(inverted_dict[name], value)
197+
else:
198+
return ShortAttributeRecord(name, value)
199+
200+
201+
def handle_starttag(self, tag, attrs):
202+
if self.data:
203+
self._store_data(self.data,False)
204+
self.data = None
205+
206+
el = self._parse_tag(tag)
207+
for n,v in attrs:
208+
el.attributes.append(self._parse_attr(n,v))
209+
self.last_record.childs.append(el)
210+
el.parent = self.last_record
211+
self.last_record = el
212+
213+
def handle_startendtag(self, tag, attrs):
214+
if self.data:
215+
self._store_data(self.data,False)
216+
self.data = None
217+
218+
el = self._parse_tag(tag)
219+
for n,v in attrs:
220+
el.attributes.append(self._parse_attr(n,v))
221+
self.last_record.childs.append(el)
222+
#self.last_record.childs.append(EndElementRecord())
223+
224+
def handle_endtag(self, tag):
225+
if self.data:
226+
self._store_data(self.data, True)
227+
self.data = None
228+
else:
229+
pass#self.last_record.childs.append(EndElementRecord())
230+
231+
self.last_record = self.last_record.parent
232+
233+
def handle_data(self,data):
234+
if not self.data:
235+
self.data = data
236+
else:
237+
self.data += data
238+
239+
def handle_charref(self, name):
240+
self.handle_data(chr(int(name, 16)))
241+
242+
def handle_entityref(self, name):
243+
self.handle_data(unescape(name))
244+
245+
handle_decl = handle_data
246+
247+
def handle_comment(self,comment):
248+
if data:
249+
self._store_data(self.data, False)
250+
self.data = None
251+
252+
self.last_record.childs.append(CommentRecord(comment))
253+
254+
255+
if __name__ == '__main__':
256+
import sys
257+
258+
fp = sys.stdin
259+
260+
if len(sys.argv) > 1:
261+
fp = open(sys.argv[1], 'r')
262+
263+
logging.basicConfig(level=logging.INFO)
264+
265+
p = Parser()
266+
indata = fp.read()#.strip()
267+
fp.close()
268+
p.feed(indata)
269+
sys.stdout.write(dump_records(p.records))

0 commit comments

Comments
 (0)
Please sign in to comment.