Skip to content

Commit

Permalink
[cli] Fix source line / file for remote reports
Browse files Browse the repository at this point in the history
Previously if a run is stored to a CodeChecker server where the file paths are
trimmed and this run is given to `CodeChecker cmd diff` command, and
a remote report was in the result set of this command, the plain text output contained
an exception that the client failed to open the source file for reading. This
problem can be easily reproduced with the following steps:
 - Log and analyze a project.
 - Store the same results to two remote runs and use the `--trim-path-prefix` option.
 - Run the following command: `CodeChecker cmd diff -b remote_run1 -n remote_run2 --unresolved`

With this patch if HTML output format is given to the CodeChecker diff command,
we will try to fetch source file contents from the server and use it instead of
trying to get it from the user's local system.

Also if different output format is given (e.g.: plaintext) for performance reason
we will not fetch the whole source file contents but only the necessary source
line conents.
  • Loading branch information
csordasmarton committed Jan 12, 2022
1 parent 26f7f42 commit 78b3f95
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,14 @@ def content(self, content: str):
self.__content = content

def get_line(self, line: int) -> str:
return util.get_line(self.original_path, line)
""" Get content from the given line.
Load file content if it's not loaded yet.
"""
if self.__content is None:
return util.get_line(self.original_path, line)

return self.__content.splitlines(keepends=True)[line - 1]

def trim(self, path_prefixes: Optional[List[str]] = None) -> str:
""" Removes the longest matching leading path from the file paths. """
Expand Down Expand Up @@ -116,7 +123,7 @@ def __hash__(self) -> int:
return builtins.hash(self.id)

def __repr__(self):
return self.to_json()
return json.dumps(self.to_json())


def get_or_create_file(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ def __get_source_file_for_analyzer_result_file(
return None


def format_source_line(event: BugPathEvent) -> str:
def format_main_report(report: Report) -> str:
""" Format bug path event. """
line = event.file.get_line(event.line)
line = report.source_line
if line == '':
return ''

marker_line = line[0:(event.column - 1)]
marker_line = line[0:(report.column - 1)]
marker_line = ' ' * (len(marker_line) + marker_line.count('\t'))

line = line.replace('\t', ' ')
Expand Down Expand Up @@ -158,13 +158,11 @@ def convert(
reports = sorted(source_file_report_map[file_path],
key=lambda report: report.line)
for report in reports:
last_event = report.bug_path_events[-1]

# If file content is changed, do not print the source code comments
# (if available) and instead of the source line, print a warning
# message.
content_is_not_changed = \
last_event.file.original_path not in report.changed_files
report.file.original_path not in report.changed_files

if content_is_not_changed:
report.dump_source_code_comment_warnings()
Expand All @@ -177,7 +175,7 @@ def convert(
if source_code_comment.line:
output.write(f"{source_code_comment.line.rstrip()}\n")

output.write(f"{format_source_line(last_event)}")
output.write(f"{format_main_report(report)}")
else:
output.write(InvalidFileContentMsg)

Expand Down
96 changes: 59 additions & 37 deletions web/client/codechecker_client/cmd_line_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,38 +771,46 @@ def handle_diff_results(args):

context = webserver_context.get_context()

client = None

file_cache: Dict[int, File] = {}

def cached_report_file_lookup(file_id):
def cached_report_file_lookup(file_id: int, file_path: str) -> File:
"""
Get source file data for the given file and caches it in a file cache
if file data is not found in the cache. Finally, it returns the source
file data from the cache.
It will create a file object with the given 'file_path' attribute if
file data is not found in the cache already based on the following
conditions:
- if HTML output is given it will try to get source file data for the
given file id from the server and set the content attribute of the
file object. File content is needed only by the HTML output
converter.
- otherwise it will create a file object where file content will not
be filled automatically but will be loaded from the host machine
when it is needed.
Finally, it returns the source file data from the cache.
"""
nonlocal file_cache

if file_id not in file_cache:
source = client.getSourceFileData(
file_id, True, ttypes.Encoding.BASE64)
content = convert.from_b64(source.fileContent)
file_cache[file_id] = File(file_path, file_id)

file_cache[file_id] = File(source.filePath, file_id, content)
if 'html' in args.output_format:
source = client.getSourceFileData(
file_id, True, ttypes.Encoding.BASE64)
file_cache[file_id].content = convert.from_b64(
source.fileContent)

return file_cache[file_id]

def convert_report_data_to_report(
client,
reports_data: List[ttypes.ReportData]
) -> List[Report]:
""" Convert the given report data list to local reports. """
reports = []

if not reports_data:
return reports

# Get source line contents from the server.
def get_source_line_contents(
reports: List[ttypes.ReportData]
) -> Dict[int, Dict[int, str]]:
"""
Get source line contents from the server for the given report data.
"""
source_lines = defaultdict(set)
for report_data in reports_data:
for report_data in reports:
source_lines[report_data.fileId].add(report_data.line)

lines_in_files_requested = []
Expand All @@ -811,22 +819,40 @@ def convert_report_data_to_report(
ttypes.LinesInFilesRequested(fileId=file_id,
lines=source_lines[file_id]))

source_line_contents = client.getLinesInSourceFileContents(
return client.getLinesInSourceFileContents(
lines_in_files_requested, ttypes.Encoding.BASE64)

def convert_report_data_to_report(
reports_data: List[ttypes.ReportData]
) -> List[Report]:
"""
Convert the given report data list to local reports.
If one of the given output format is HTML it will get full source file
contents from the server otherwise for performance reason it will
get only necessarry line contents.
"""
reports = []

if not reports_data:
return reports

source_line_contents = {}
if 'html' not in args.output_format:
source_line_contents = get_source_line_contents(reports_data)

# Convert reports data to reports.
for report_data in reports_data:
report = report_type_converter.to_report(report_data)

# For HTML output we need to override the file and get content
# from the server.
if 'html' in args.output_format:
report.file = cached_report_file_lookup(report_data.fileId)
report = report_type_converter.to_report(
report_data, cached_report_file_lookup)

report.changed_files = []
report.source_code_comments = []
report.source_line = \
source_line_contents[report_data.fileId][report_data.line]

if source_line_contents:
source_line = convert.from_b64(
source_line_contents[report_data.fileId][report_data.line])
report.source_line = f"{source_line}{os.linesep}"

# TODO: get details
reports.append(report)
Expand Down Expand Up @@ -862,8 +888,7 @@ def get_diff_local_dir_remote_run(
results = get_diff_base_results(
client, args, run_ids, remote_hashes, suppressed_in_code)

filtered_reports.extend(
convert_report_data_to_report(client, results))
filtered_reports.extend(convert_report_data_to_report(results))
elif diff_type == ttypes.DiffType.UNRESOLVED:
# Get remote hashes which can be found in the remote run and in the
# local report directory.
Expand All @@ -889,8 +914,7 @@ def get_diff_local_dir_remote_run(
for result in results:
filtered_report_hashes.discard(result.bugHash)

filtered_reports.extend(
convert_report_data_to_report(client, results))
filtered_reports.extend(convert_report_data_to_report(results))
elif diff_type == ttypes.DiffType.RESOLVED:
# Get remote hashes which can be found in the remote run and in the
# local report directory.
Expand Down Expand Up @@ -952,8 +976,7 @@ def get_diff_remote_run_local_dir(
results = get_diff_base_results(
client, args, run_ids, remote_hashes, suppressed_in_code)

filtered_reports.extend(
convert_report_data_to_report(client, results))
filtered_reports.extend(convert_report_data_to_report(results))

return filtered_reports, filtered_report_hashes, run_names

Expand Down Expand Up @@ -995,7 +1018,7 @@ def get_diff_remote_runs(
client, base_ids, constants.MAX_QUERY_SIZE, 0, sort_mode,
report_filter, cmp_data, False)

reports = convert_report_data_to_report(client, all_results)
reports = convert_report_data_to_report(all_results)
return reports, base_run_names, new_run_names

def get_diff_local_dirs(
Expand Down Expand Up @@ -1187,7 +1210,6 @@ def print_reports(
LOG.info("Matching local baseline files (--newname): %s",
', '.join(newname_baseline_files))

client = None
# We set up the client if we are not comparing two local report directories
# or baseline files.
if basename_run_names or newname_run_names:
Expand Down
8 changes: 6 additions & 2 deletions web/client/codechecker_client/report_type_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,22 @@

""" Convert between Report type and thrift ReportData type. """

from typing import Callable
from codechecker_api.codeCheckerDBAccess_v6.ttypes import ReportData, Severity

from codechecker_report_converter.report import File, Report


def to_report(report: ReportData) -> Report:
def to_report(
report: ReportData,
get_file: Callable[[int, str], File]
) -> Report:
""" Create a Report object from the given thrift report data. """
severity = Severity._VALUES_TO_NAMES[report.severity] \
if report.severity else 'UNSPECIFIED'

return Report(
File(report.checkedFile),
get_file(report.fileId, report.checkedFile),
report.line,
report.column,
report.checkerMsg,
Expand Down
6 changes: 5 additions & 1 deletion web/client/tests/unit/test_report_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ def test_ReportData_to_Report(self):
bugPathLength=5,
)

report = report_type_converter.to_report(rep_data)
def get_file(file_id: int, file_path: str) -> File:
""" Get file object for the given report. """
return File(file_path, file_id)

report = report_type_converter.to_report(rep_data, get_file)
self.assertEqual(report.checker_name, rep_data.checkerId)
self.assertEqual(report.report_hash, rep_data.bugHash)
self.assertEqual(report.file.path, rep_data.checkedFile)
Expand Down
3 changes: 2 additions & 1 deletion web/tests/functional/diff_remote/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def setup_package():
'skip_list_file': skip_list_file,
'check_env': test_env,
'workspace': TEST_WORKSPACE,
'checkers': []
'checkers': [],
'trim_path_prefix': TEST_WORKSPACE
}

# Start or connect to the running CodeChecker server and get connection
Expand Down
51 changes: 45 additions & 6 deletions web/tests/functional/diff_remote/test_diff_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@

import os
import re
import shutil
import unittest
from datetime import datetime, timedelta

from codechecker_api.codeCheckerDBAccess_v6.ttypes import CompareData, \
DiffType, Order, ReportFilter, ReviewStatus, RunHistoryFilter, \
RunSortMode, RunSortType, Severity

from codechecker_report_converter.report import InvalidFileContentMsg

from libtest import env
from libtest.codechecker import get_diff_results
from libtest.debug_printer import print_run_results
Expand Down Expand Up @@ -48,31 +51,31 @@ class DiffRemote(unittest.TestCase):
def setUp(self):

# TEST_WORKSPACE is automatically set by test package __init__.py .
test_workspace = os.environ['TEST_WORKSPACE']
self.test_workspace = os.environ['TEST_WORKSPACE']

test_class = self.__class__.__name__
print('Running ' + test_class + ' tests in ' + test_workspace)
print('Running ' + test_class + ' tests in ' + self.test_workspace)

# Get the test configuration from the prepared int the test workspace.
self.test_cfg = env.import_test_cfg(test_workspace)
self.test_cfg = env.import_test_cfg(self.test_workspace)

# Get the clang version which is tested.
self._clang_to_test = env.clang_to_test()

# Get the test project configuration from the prepared test workspace.
self._testproject_data = env.setup_test_proj_cfg(test_workspace)
self._testproject_data = env.setup_test_proj_cfg(self.test_workspace)
self.assertIsNotNone(self._testproject_data)

# Setup a viewer client to test viewer API calls.
self._cc_client = env.setup_viewer_client(test_workspace)
self._cc_client = env.setup_viewer_client(self.test_workspace)
self.assertIsNotNone(self._cc_client)

# Get the CodeChecker cmd if needed for the tests.
self._codechecker_cmd = env.codechecker_cmd()

# Get the run names which belong to this test.
# Name order matters from __init__ !
run_names = env.get_run_names(test_workspace)
run_names = env.get_run_names(self.test_workspace)

sort_mode = RunSortMode(RunSortType.DATE, Order.ASC)
runs = self._cc_client.getRunData(None, None, 0, sort_mode)
Expand Down Expand Up @@ -706,3 +709,39 @@ def test_multiple_runs(self):
'--unresolved', 'json', ["--url", self._url])

self.assertNotEqual(len(unresolved_results[0]), 0)

def test_source_line_content(self):
"""
Check that line / file contents are set properly for different
output types.
"""
base_run_name = self._test_runs[0].name
new_run_name = self._test_runs[1].name

html_reports = os.path.join(self.test_workspace, "html_reports")

base_run_names = [base_run_name, new_run_name]
new_run_names = [new_run_name, base_run_name]
extra_args = [
"--url", self._url,
"--file", "*/divide_zero.cpp",
"--checker-name", "core.DivideZero",
"--output", "plaintext", "html",
'--export-dir', html_reports]

# Check plain text output.
out, _, _ = \
get_diff_results(base_run_names, new_run_names,
'--unresolved', None, extra_args)

lines = out.split(os.linesep)
for idx, line in enumerate(lines):
if '[core.DivideZero]' in line:
self.assertTrue(lines[idx + 1].strip(), "Invalid line content")

# Check HTML output
for file_path in os.listdir(html_reports):
with open(os.path.join(html_reports, file_path)) as f:
self.assertNotIn(InvalidFileContentMsg, f.read())

shutil.rmtree(html_reports, ignore_errors=True)

0 comments on commit 78b3f95

Please sign in to comment.