-
Notifications
You must be signed in to change notification settings - Fork 10
/
checktags.py
99 lines (91 loc) · 4 KB
/
checktags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
# Copyright (c) 2008-11 Qtrac Ltd. All rights reserved.
# This program or module is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. It is provided for educational
# purposes and is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
import string
import sys
class InvalidEntityError(Exception): pass
class InvalidNumericEntityError(InvalidEntityError): pass
class InvalidAlphaEntityError(InvalidEntityError): pass
class InvalidTagContentError(Exception): pass
def parse(filename, skip_on_first_error=False):
HEXDIGITS = frozenset("0123456789ABCDEFabcdef")
NORMAL, PARSING_TAG, PARSING_ENTITY = range(3)
state = NORMAL
entity = ""
fh = None
try:
fh = open(filename, encoding="utf8")
errors = False
for lino, line in enumerate(fh, start=1):
for column, c in enumerate(line, start=1):
try:
if state == NORMAL:
if c == "<":
state = PARSING_TAG
elif c == "&":
entity = ""
state = PARSING_ENTITY
elif state == PARSING_TAG:
if c == ">":
state = NORMAL
elif c == "<":
raise InvalidTagContentError()
elif state == PARSING_ENTITY:
if c == ";":
if entity.startswith("#"):
if frozenset(entity[1:]) - HEXDIGITS:
raise InvalidNumericEntityError()
elif not entity.isalpha():
raise InvalidAlphaEntityError()
entity = ""
state = NORMAL
else:
if entity.startswith("#"):
if c not in HEXDIGITS:
raise InvalidNumericEntityError()
elif (entity and
c not in string.ascii_letters):
raise InvalidAlphaEntityError()
entity += c
except (InvalidEntityError,
InvalidTagContentError) as err:
if isinstance(err, InvalidNumericEntityError):
error = "invalid numeric entity"
elif isinstance(err, InvalidAlphaEntityError):
error = "invalid alphabetic entity"
elif isinstance(err, InvalidTagContentError):
error = "invalid tag"
print("ERROR {0} in {1} on line {2} column {3}"
.format(error, filename, lino, column))
if skip_on_first_error:
raise
entity = ""
state = NORMAL
errors = True
if state == PARSING_TAG:
raise EOFError("missing '>' at end of " + filename)
elif state == PARSING_ENTITY:
raise EOFError("missing ';' at end of " + filename)
if not errors:
print("OK", filename)
except (InvalidEntityError, InvalidTagContentError):
pass # Already handled
except EOFError as err:
print("ERROR unexpected EOF:", err)
except EnvironmentError as err:
print(err)
finally:
if fh is not None:
fh.close()
if len(sys.argv) < 2:
print("usage: checktags.py infile1 [infile2 [... infileN]]")
sys.exit()
for filename in sys.argv[1:]:
parse(filename)