forked from NeuromatchAcademy/course-content
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlint_tutorial.py
192 lines (139 loc) · 5.42 KB
/
lint_tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""Lint tutorial notebooks with pyflakes and pycodestyle (aka flake8).
Running this script on a notebook will print a report of issues flagged by
pyflakes (which checks some aspects of code quality) and pycodestyle (which
checks adherence to the PEP8 stylistic standards).
Note that these checks do not capture all potential issues with a codebase,
and some checks will false-alarm because of deliberate choices we have made
about how to write tutorials. Nevertheless, this can be an easy way to flag
potential issues.
Requires nbformat (part of Jupyter) and flake8.
"""
import os
import io
import re
import sys
import argparse
import tempfile
import subprocess
import collections
import nbformat
from pyflakes.api import check
from pyflakes.reporter import Reporter
def main(arglist):
args = parse_args(arglist)
_, fname = os.path.split(args.path)
script, cell_lines = extract_code(args.path)
warnings, errors = check_code(script)
violations = check_style(script)
if args.brief:
report_brief(fname, warnings, errors, violations)
else:
line_map = remap_line_numbers(cell_lines)
report_verbose(fname, warnings, errors, violations, line_map)
def parse_args(arglist):
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("path", help="Path to notebook file")
parser.add_argument("--brief", action="store_true",
help="Print brief report (useful for aggregating)")
return parser.parse_args(arglist)
def extract_code(nb_fname):
"""Turn code cells from notebook into a script, track cell sizes."""
with open(nb_fname) as f:
nb = nbformat.read(f, nbformat.NO_CONVERT)
script_lines = []
cell_lengths = []
for cell in nb.get("cells", []):
if cell["cell_type"] == "code":
cell_lines = cell.get("source", "").split("\n")
cell_lengths.append(len(cell_lines))
for line in cell_lines:
if line and line[0] in ["!", "%"]: # IPython syntax
line = "# " + line
script_lines.append(line)
script = "\n".join(script_lines)
return script, cell_lengths
def check_code(script):
"""Run pyflakes checks over the script and capture warnings/errors."""
errors = io.StringIO()
warnings = io.StringIO()
reporter = Reporter(warnings, errors)
check(script, "notebook", reporter)
warnings.seek(0)
errors.seek(0)
return warnings, errors
def check_style(script):
"""Write a temporary script and run pycodestyle (PEP8) on it."""
with tempfile.NamedTemporaryFile("w", suffix=".py") as f:
f.write(script)
cmdline = [
"pycodestyle",
"--ignore=E111,E114",
"--max-line-length=88",
f.name,
]
res = subprocess.run(cmdline, capture_output=True)
output = res.stdout.decode().replace(f.name, "f").split("\n")
if not output:
return collections.Counter()
error_classes = []
pat = re.compile(r"^f:\d+:\d+: (\w\d{3}) (.*)$")
for line in output:
m = pat.match(line)
if m is not None:
error_classes.append(f"{m.group(1)} ({m.group(2)})")
return collections.Counter(error_classes)
def remap_line_numbers(cell_lines):
"""Create a mapping from script line number to notebook cell/line."""
line_map = {}
cell_start = 0
for cell, cell_length in enumerate(cell_lines, 1):
for line in range(1, cell_length + 1):
line_map[cell_start + line] = cell, line
cell_start += cell_length
return line_map
def report_brief(fname, warnings, errors, violations):
"""Print a single-line report, suibtable for aggregation."""
n_warnings = len(warnings.read().splitlines())
n_errors = len(errors.read().splitlines())
n_violations = len(list(violations.elements()))
print(f"{fname} {n_warnings + n_errors} {n_violations}")
def report_verbose(fname, warnings, errors, violations, line_map):
"""Report every pyflakes problem and more codestyle information."""
s = f"Code report for {fname}"
print("", s, "=" * len(s), sep="\n")
s = "Quality (pyflakes)"
print("", s, "-" * len(s), "", sep="\n")
warning_lines = reformat_line_problems(warnings, line_map)
error_lines = reformat_line_problems(errors, line_map, "ERROR in ")
issues = warning_lines + error_lines
print(f"Total code issues: {len(issues)}")
if issues:
print()
print("\n".join(warning_lines + error_lines))
s = "Style (pycodestyle)"
print("", s, "-" * len(s), "", sep="\n")
n = len(list(violations.elements()))
print(f"Total PEP8 violations: {n}")
# TODO parametrize n_most_common
if violations:
print()
print("Common problems:")
for code, count in violations.most_common(10):
plural = "" if count == 1 else "s"
print(f"- {count} instance{plural} of {code}")
print("")
def reformat_line_problems(stream, line_map, prefix=""):
"""Reformat a pyflakes output stream for notebook cells."""
pat = re.compile(r"^\w*:(\d+):\d+ (.+)$")
new_lines = []
orig_lines = stream.read().splitlines()
for line in orig_lines:
m = pat.match(line)
if m:
cell, line = line_map[int(m.group(1))]
new_lines.append(
f"{prefix}Cell {cell}, Line {line}: {m.group(2)}"
)
return new_lines
if __name__ == "__main__":
main(sys.argv[1:])