Skip to content

Commit

Permalink
[fix][clang][server] Add report hash to the report_path_hash
Browse files Browse the repository at this point in the history
`report_path_hash()` is used for generating a report specific hash for
determining if a bugreport should be saved in the database or not.
We skip duplicate bugreports.

Unfortunately, in some cases clangsa produced plists where an included
file had a context-insensitive bugreport at the exact same
file:row:col:checker, but different bug hash.

Previously, on the first was stored into the database, thus the
associated bug hash was nondeterministically choosen, causing a weird
behavior.

Imagine you store the same report directory different runs to the
server. One would expect that the diff between those runs is empty.
It was not the case!

By adding the report hash to the `report_path_hash` calculation,
seemingly identical reports - with different report hashes - now will be
considered different.

This way if there are multiple reports to a header location, each
occurrences of that bug will be stored to the server if they have
different report hashes.
The users still can specify a non-default report hash generation for
changing this behavior e.g. by using the `--report-hash context-free-v2`.
  • Loading branch information
Balazs Benics committed Apr 28, 2022
1 parent 01a042c commit 23c59c8
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,11 @@ def get_report_path_hash(report: Report) -> str:
for event in report.bug_path_events:
line = str(event.line)
col = str(event.column)

report_path_hash += f"{line}|{col}|{event.message}{event.file.name}"

report_path_hash += report.checker_name
if report.report_hash:
report_path_hash += report.report_hash

if not report_path_hash:
LOG.error('Failed to generate report path hash: %s', report)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ def test_report_path_hash_generation(self):

report_hash_to_path_hash = {
'79e31a6ba028f0b7d9779faf4a6cb9cf':
'acb1d3dc1459f681bd3c743e6c015b37',
'2dc99e0cd54ee4216bd884d49f0bc8c7',
'8714f42d8328bc78d5d7bff6ced918cc':
'dcaaf2905d607a16e3fa330edb8e9f89',
'a316a401913aaef9af56692bef6ba109',
'a6d3464f8aab9eb31a8ea7e167e84322':
'd089a50f34051c68c7bb4c5ac2c4c5d5'
'9653c97c587fd7a365fbe0ce3df00d7e'
}

for report in reports:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def test_gen_report_path_hash(self):

expected_path_hash = {
'f48840093ef89e291fb68a95a5181612':
'93cb93bdcee10434f9cf9f486947c88e',
'b053ba21d4d1f0ad2ef0d5a244c19ea4',
'e4907182b363faf2ec905fc32cc5a4ab':
'71a4dc24bf88af2b13be83d8d15bd6f0'}
'de139052a89686cc13828ae9e1e1cc5f'}

reports = get_reports(test_plist)
for report in reports:
Expand Down
63 changes: 63 additions & 0 deletions web/tests/functional/report_viewer_api/test_files/run.plist
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,69 @@
</dict>
</dict>

<!-- Seventh bug (same as the first one except for the report hash) -->
<dict>
<key>path</key>
<array>
<dict>
<key>kind</key><string>event</string>
<key>location</key>
<dict>
<key>line</key><integer>3</integer>
<key>col</key><integer>7</integer>
<key>file</key><integer>0</integer>
</dict>
<key>ranges</key>
<array>
<array>
<dict>
<key>line</key><integer>3</integer>
<key>col</key><integer>7</integer>
<key>file</key><integer>0</integer>
</dict>
<dict>
<key>line</key><integer>3</integer>
<key>col</key><integer>7</integer>
<key>file</key><integer>0</integer>
</dict>
</array>
<array>
<dict>
<key>line</key><integer>3</integer>
<key>col</key><integer>11</integer>
<key>file</key><integer>0</integer>
</dict>
<dict>
<key>line</key><integer>3</integer>
<key>col</key><integer>15</integer>
<key>file</key><integer>0</integer>
</dict>
</array>
</array>
<key>depth</key><integer>0</integer>
<key>extended_message</key>
<string>checker message</string>
<key>message</key>
<string>checker message</string>
</dict>
</array>
<key>description</key><string>checker message</string>
<key>category</key><string>Dead store</string>
<key>type</key><string>Dead initialization</string>
<key>check_name</key><string>deadcode.DeadStores</string>
<!-- This hash is experimental and going to change! -->
<key>issue_hash_content_of_line_in_context</key><string>22222</string>
<key>issue_context_kind</key><string>function</string>
<key>issue_context</key><string>main</string>
<key>issue_hash_function_offset</key><string>1</string>
<key>location</key>
<dict>
<key>line</key><integer>3</integer>
<key>col</key><integer>7</integer>
<key>file</key><integer>0</integer>
</dict>
</dict>

</array>
</dict>
</plist>
20 changes: 13 additions & 7 deletions web/tests/functional/report_viewer_api/test_hash_clash.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_hash_clash(self):
"""
reports = self._reports_for_latest_run()

# The PList file contains six bugs:
# The PList file contains seven bugs:
# 1. A normal bug
# 2. Same as the first one (no new report generated)
# 3. Same as the first one except for line numbers (new report
Expand All @@ -125,6 +125,8 @@ def test_hash_clash(self):
# generated)
# 6. Same as the first one except for the checker message (new report
# generated)
# 7. Same as the first one except for the report hash (new report
# generated)

fileid1 = None
fileid2 = None
Expand All @@ -140,15 +142,19 @@ def test_hash_clash(self):
fileid2 = f.fileId

by_file = defaultdict(int)
by_checker_message = defaultdict(int)
by_bug_report_hash = defaultdict(int)

for report in reports:
by_file[report.fileId] += 1
by_checker_message[report.checkerMsg] += 1
by_bug_report_hash[report.bugHash] += 1

self.assertEqual(by_file[fileid1], 4)
self.assertEqual(by_file[fileid1], 5)
self.assertEqual(by_file[fileid2], 1)

by_checker_message = defaultdict(int)
for report in reports:
by_checker_message[report.checkerMsg] += 1

self.assertEqual(by_checker_message['checker message'], 4)
self.assertEqual(by_checker_message['checker message'], 5)
self.assertEqual(by_checker_message['checker message 2'], 1)

self.assertEqual(by_bug_report_hash['11111'], 5)
self.assertEqual(by_bug_report_hash['22222'], 1)

0 comments on commit 23c59c8

Please sign in to comment.