Skip to content

Commit

Permalink
Speed up triage by removing duplicate lines when normalizing.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryan Hitchman committed Sep 1, 2017
1 parent 420a18d commit c24da4d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
17 changes: 11 additions & 6 deletions triage/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,12 @@ def repl(m):

s = flakeReasonOrdinalRE.sub(repl, s)

if len(s) > 400000: # ridiculously long test output
s = s[:200000] + '\n...[truncated]...\n' + s[-200000:]
if len(s) > 10000:
# for long strings, remove repeated lines!
s = re.sub(r'(?m)^(.*\n)\1+', r'\1', s)

if len(s) > 200000: # ridiculously long test output
s = s[:100000] + '\n...[truncated]...\n' + s[-100000:]

return s

Expand Down Expand Up @@ -239,8 +243,10 @@ def cluster_local(failed_tests):
"""Cluster together the failures for each test. """
clustered = {}
for test_name, tests in sorted(failed_tests.iteritems(), key=lambda x: len(x[1]), reverse=True):
print len(tests), test_name
print len(tests), test_name,
sys.stdout.flush()
clustered[test_name] = cluster_test(tests)
print len(clustered[test_name])
return clustered


Expand All @@ -264,7 +270,6 @@ def cluster_global(clustered, previous_clustered):
for cluster in previous_clustered:
key = cluster['key']
if key != normalize(key):
print 'WTF'
print key
print normalize(key)
n += 1
Expand Down Expand Up @@ -502,12 +507,13 @@ def parse_args(args):

def main(args):
builds, failed_tests = load_failures(args.builds, args.tests)
clustered_local = cluster_local(failed_tests)

previous_clustered = None
if args.previous:
print 'loading previous'
previous_clustered = json.load(args.previous)['clustered']

clustered_local = cluster_local(failed_tests)
clustered = cluster_global(clustered_local, previous_clustered)

print '%d clusters' % len(clustered)
Expand Down Expand Up @@ -536,6 +542,5 @@ def main(args):
sort_keys=True)



if __name__ == '__main__':
main(parse_args(sys.argv[1:]))
2 changes: 1 addition & 1 deletion triage/summarize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_normalize(self):
('Mon, 12 January 2017 11:34:35 blah blah', 'TIMEblah blah'),
('123.45.68.12:345 abcd1234eeee', 'UNIQ1 UNIQ2'),
('foobarbaz ' * 500000,
'foobarbaz ' * 20000 + '\n...[truncated]...\n' + 'foobarbaz ' * 20000),
'foobarbaz ' * 10000 + '\n...[truncated]...\n' + 'foobarbaz ' * 10000),
]:
self.assertEqual(summarize.normalize(src), dst)

Expand Down

0 comments on commit c24da4d

Please sign in to comment.