[cli] Fix source line / file for remote reports

Previously if a run is stored to a CodeChecker server where the file paths are trimmed and this run is given to `CodeChecker cmd diff` command, and a remote report was in the result set of this command, the plain text output contained an exception that the client failed to open the source file for reading. This problem can be easily reproduced with the following steps: - Log and analyze a project. - Store the same results to two remote runs and use the `--trim-path-prefix` option. - Run the following command: `CodeChecker cmd diff -b remote_run1 -n remote_run2 --unresolved` With this patch if HTML output format is given to the CodeChecker diff command, we will try to fetch source file contents from the server and use it instead of trying to get it from the user's local system. Also if different output format is given (e.g.: plaintext) for performance reason we will not fetch the whole source file contents but only the necessary source line conents.
PeterKaszab · Jan 12, 2022 · 78b3f95 · 78b3f95
1 parent 26f7f42
commit 78b3f95
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 56 deletions.
diff --git a/tools/report-converter/codechecker_report_converter/report/__init__.py b/tools/report-converter/codechecker_report_converter/report/__init__.py
@@ -87,7 +87,14 @@ def content(self, content: str):
             self.__content = content
 
     def get_line(self, line: int) -> str:
-        return util.get_line(self.original_path, line)
+        """ Get content from the given line.
+
+        Load file content if it's not loaded yet.
+        """
+        if self.__content is None:
+            return util.get_line(self.original_path, line)
+
+        return self.__content.splitlines(keepends=True)[line - 1]
 
     def trim(self, path_prefixes: Optional[List[str]] = None) -> str:
         """ Removes the longest matching leading path from the file paths. """
@@ -116,7 +123,7 @@ def __hash__(self) -> int:
         return builtins.hash(self.id)
 
     def __repr__(self):
-        return self.to_json()
+        return json.dumps(self.to_json())
 
 
 def get_or_create_file(

diff --git a/tools/report-converter/codechecker_report_converter/report/output/plaintext.py b/tools/report-converter/codechecker_report_converter/report/output/plaintext.py
@@ -44,13 +44,13 @@ def __get_source_file_for_analyzer_result_file(
     return None
 
 
-def format_source_line(event: BugPathEvent) -> str:
+def format_main_report(report: Report) -> str:
     """ Format bug path event. """
-    line = event.file.get_line(event.line)
+    line = report.source_line
     if line == '':
         return ''
 
-    marker_line = line[0:(event.column - 1)]
+    marker_line = line[0:(report.column - 1)]
     marker_line = ' ' * (len(marker_line) + marker_line.count('\t'))
 
     line = line.replace('\t', '  ')
@@ -158,13 +158,11 @@ def convert(
         reports = sorted(source_file_report_map[file_path],
                          key=lambda report: report.line)
         for report in reports:
-            last_event = report.bug_path_events[-1]
-
             # If file content is changed, do not print the source code comments
             # (if available) and instead of the source line, print a warning
             # message.
             content_is_not_changed = \
-                last_event.file.original_path not in report.changed_files
+                report.file.original_path not in report.changed_files
 
             if content_is_not_changed:
                 report.dump_source_code_comment_warnings()
@@ -177,7 +175,7 @@ def convert(
                     if source_code_comment.line:
                         output.write(f"{source_code_comment.line.rstrip()}\n")
 
-                output.write(f"{format_source_line(last_event)}")
+                output.write(f"{format_main_report(report)}")
             else:
                 output.write(InvalidFileContentMsg)
 

diff --git a/web/client/codechecker_client/cmd_line_client.py b/web/client/codechecker_client/cmd_line_client.py
@@ -771,38 +771,46 @@ def handle_diff_results(args):
 
     context = webserver_context.get_context()
 
+    client = None
+
     file_cache: Dict[int, File] = {}
 
-    def cached_report_file_lookup(file_id):
+    def cached_report_file_lookup(file_id: int, file_path: str) -> File:
         """
-        Get source file data for the given file and caches it in a file cache
-        if file data is not found in the cache. Finally, it returns the source
-        file data from the cache.
+        It will create a file object with the given 'file_path' attribute if
+        file data is not found in the cache already based on the following
+        conditions:
+          - if HTML output is given it will try to get source file data for the
+            given file id from the server and set the content attribute of the
+            file object. File content is needed only by the HTML output
+            converter.
+          - otherwise it will create a file object where file content will not
+            be filled automatically but will be loaded from the host machine
+            when it is needed.
+
+        Finally, it returns the source file data from the cache.
         """
         nonlocal file_cache
 
         if file_id not in file_cache:
-            source = client.getSourceFileData(
-                file_id, True, ttypes.Encoding.BASE64)
-            content = convert.from_b64(source.fileContent)
+            file_cache[file_id] = File(file_path, file_id)
 
-            file_cache[file_id] = File(source.filePath, file_id, content)
+            if 'html' in args.output_format:
+                source = client.getSourceFileData(
+                    file_id, True, ttypes.Encoding.BASE64)
+                file_cache[file_id].content = convert.from_b64(
+                    source.fileContent)
 
         return file_cache[file_id]
 
-    def convert_report_data_to_report(
-        client,
-        reports_data: List[ttypes.ReportData]
-    ) -> List[Report]:
-        """ Convert the given report data list to local reports. """
-        reports = []
-
-        if not reports_data:
-            return reports
-
-        # Get source line contents from the server.
+    def get_source_line_contents(
+        reports: List[ttypes.ReportData]
+    ) -> Dict[int, Dict[int, str]]:
+        """
+        Get source line contents from the server for the given report data.
+        """
         source_lines = defaultdict(set)
-        for report_data in reports_data:
+        for report_data in reports:
             source_lines[report_data.fileId].add(report_data.line)
 
         lines_in_files_requested = []
@@ -811,22 +819,40 @@ def convert_report_data_to_report(
                 ttypes.LinesInFilesRequested(fileId=file_id,
                                              lines=source_lines[file_id]))
 
-        source_line_contents = client.getLinesInSourceFileContents(
+        return client.getLinesInSourceFileContents(
             lines_in_files_requested, ttypes.Encoding.BASE64)
 
+    def convert_report_data_to_report(
+        reports_data: List[ttypes.ReportData]
+    ) -> List[Report]:
+        """
+        Convert the given report data list to local reports.
+
+        If one of the given output format is HTML it will get full source file
+        contents from the server otherwise for performance reason it will
+        get only necessarry line contents.
+        """
+        reports = []
+
+        if not reports_data:
+            return reports
+
+        source_line_contents = {}
+        if 'html' not in args.output_format:
+            source_line_contents = get_source_line_contents(reports_data)
+
         # Convert reports data to reports.
         for report_data in reports_data:
-            report = report_type_converter.to_report(report_data)
-
-            # For HTML output we need to override the file and get content
-            # from the server.
-            if 'html' in args.output_format:
-                report.file = cached_report_file_lookup(report_data.fileId)
+            report = report_type_converter.to_report(
+                report_data, cached_report_file_lookup)
 
             report.changed_files = []
             report.source_code_comments = []
-            report.source_line = \
-                source_line_contents[report_data.fileId][report_data.line]
+
+            if source_line_contents:
+                source_line = convert.from_b64(
+                    source_line_contents[report_data.fileId][report_data.line])
+                report.source_line = f"{source_line}{os.linesep}"
 
             # TODO: get details
             reports.append(report)
@@ -862,8 +888,7 @@ def get_diff_local_dir_remote_run(
             results = get_diff_base_results(
                 client, args, run_ids, remote_hashes, suppressed_in_code)
 
-            filtered_reports.extend(
-                convert_report_data_to_report(client, results))
+            filtered_reports.extend(convert_report_data_to_report(results))
         elif diff_type == ttypes.DiffType.UNRESOLVED:
             # Get remote hashes which can be found in the remote run and in the
             # local report directory.
@@ -889,8 +914,7 @@ def get_diff_local_dir_remote_run(
                 for result in results:
                     filtered_report_hashes.discard(result.bugHash)
 
-                filtered_reports.extend(
-                    convert_report_data_to_report(client, results))
+                filtered_reports.extend(convert_report_data_to_report(results))
         elif diff_type == ttypes.DiffType.RESOLVED:
             # Get remote hashes which can be found in the remote run and in the
             # local report directory.
@@ -952,8 +976,7 @@ def get_diff_remote_run_local_dir(
             results = get_diff_base_results(
                 client, args, run_ids, remote_hashes, suppressed_in_code)
 
-            filtered_reports.extend(
-                convert_report_data_to_report(client, results))
+            filtered_reports.extend(convert_report_data_to_report(results))
 
         return filtered_reports, filtered_report_hashes, run_names
 
@@ -995,7 +1018,7 @@ def get_diff_remote_runs(
             client, base_ids, constants.MAX_QUERY_SIZE, 0, sort_mode,
             report_filter, cmp_data, False)
 
-        reports = convert_report_data_to_report(client, all_results)
+        reports = convert_report_data_to_report(all_results)
         return reports, base_run_names, new_run_names
 
     def get_diff_local_dirs(
@@ -1187,7 +1210,6 @@ def print_reports(
         LOG.info("Matching local baseline files (--newname): %s",
                  ', '.join(newname_baseline_files))
 
-    client = None
     # We set up the client if we are not comparing two local report directories
     # or baseline files.
     if basename_run_names or newname_run_names:

diff --git a/web/client/codechecker_client/report_type_converter.py b/web/client/codechecker_client/report_type_converter.py
@@ -8,18 +8,22 @@
 
 """ Convert between Report type and thrift ReportData type. """
 
+from typing import Callable
 from codechecker_api.codeCheckerDBAccess_v6.ttypes import ReportData, Severity
 
 from codechecker_report_converter.report import File, Report
 
 
-def to_report(report: ReportData) -> Report:
+def to_report(
+    report: ReportData,
+    get_file: Callable[[int, str], File]
+) -> Report:
     """ Create a Report object from the given thrift report data. """
     severity = Severity._VALUES_TO_NAMES[report.severity] \
         if report.severity else 'UNSPECIFIED'
 
     return Report(
-        File(report.checkedFile),
+        get_file(report.fileId, report.checkedFile),
         report.line,
         report.column,
         report.checkerMsg,

diff --git a/web/client/tests/unit/test_report_converter.py b/web/client/tests/unit/test_report_converter.py
@@ -59,7 +59,11 @@ def test_ReportData_to_Report(self):
             bugPathLength=5,
         )
 
-        report = report_type_converter.to_report(rep_data)
+        def get_file(file_id: int, file_path: str) -> File:
+            """ Get file object for the given report. """
+            return File(file_path, file_id)
+
+        report = report_type_converter.to_report(rep_data, get_file)
         self.assertEqual(report.checker_name, rep_data.checkerId)
         self.assertEqual(report.report_hash, rep_data.bugHash)
         self.assertEqual(report.file.path, rep_data.checkedFile)

diff --git a/web/tests/functional/diff_remote/__init__.py b/web/tests/functional/diff_remote/__init__.py
@@ -75,7 +75,8 @@ def setup_package():
         'skip_list_file': skip_list_file,
         'check_env': test_env,
         'workspace': TEST_WORKSPACE,
-        'checkers': []
+        'checkers': [],
+        'trim_path_prefix': TEST_WORKSPACE
     }
 
     # Start or connect to the running CodeChecker server and get connection

diff --git a/web/tests/functional/diff_remote/test_diff_remote.py b/web/tests/functional/diff_remote/test_diff_remote.py
@@ -14,13 +14,16 @@
 
 import os
 import re
+import shutil
 import unittest
 from datetime import datetime, timedelta
 
 from codechecker_api.codeCheckerDBAccess_v6.ttypes import CompareData, \
     DiffType, Order, ReportFilter, ReviewStatus, RunHistoryFilter, \
     RunSortMode, RunSortType, Severity
 
+from codechecker_report_converter.report import InvalidFileContentMsg
+
 from libtest import env
 from libtest.codechecker import get_diff_results
 from libtest.debug_printer import print_run_results
@@ -48,31 +51,31 @@ class DiffRemote(unittest.TestCase):
     def setUp(self):
 
         # TEST_WORKSPACE is automatically set by test package __init__.py .
-        test_workspace = os.environ['TEST_WORKSPACE']
+        self.test_workspace = os.environ['TEST_WORKSPACE']
 
         test_class = self.__class__.__name__
-        print('Running ' + test_class + ' tests in ' + test_workspace)
+        print('Running ' + test_class + ' tests in ' + self.test_workspace)
 
         # Get the test configuration from the prepared int the test workspace.
-        self.test_cfg = env.import_test_cfg(test_workspace)
+        self.test_cfg = env.import_test_cfg(self.test_workspace)
 
         # Get the clang version which is tested.
         self._clang_to_test = env.clang_to_test()
 
         # Get the test project configuration from the prepared test workspace.
-        self._testproject_data = env.setup_test_proj_cfg(test_workspace)
+        self._testproject_data = env.setup_test_proj_cfg(self.test_workspace)
         self.assertIsNotNone(self._testproject_data)
 
         # Setup a viewer client to test viewer API calls.
-        self._cc_client = env.setup_viewer_client(test_workspace)
+        self._cc_client = env.setup_viewer_client(self.test_workspace)
         self.assertIsNotNone(self._cc_client)
 
         # Get the CodeChecker cmd if needed for the tests.
         self._codechecker_cmd = env.codechecker_cmd()
 
         # Get the run names which belong to this test.
         # Name order matters from __init__ !
-        run_names = env.get_run_names(test_workspace)
+        run_names = env.get_run_names(self.test_workspace)
 
         sort_mode = RunSortMode(RunSortType.DATE, Order.ASC)
         runs = self._cc_client.getRunData(None, None, 0, sort_mode)
@@ -706,3 +709,39 @@ def test_multiple_runs(self):
                              '--unresolved', 'json', ["--url", self._url])
 
         self.assertNotEqual(len(unresolved_results[0]), 0)
+
+    def test_source_line_content(self):
+        """
+        Check that line / file contents are set properly for different
+        output types.
+        """
+        base_run_name = self._test_runs[0].name
+        new_run_name = self._test_runs[1].name
+
+        html_reports = os.path.join(self.test_workspace, "html_reports")
+
+        base_run_names = [base_run_name, new_run_name]
+        new_run_names = [new_run_name, base_run_name]
+        extra_args = [
+            "--url", self._url,
+            "--file", "*/divide_zero.cpp",
+            "--checker-name", "core.DivideZero",
+            "--output", "plaintext", "html",
+            '--export-dir', html_reports]
+
+        # Check plain text output.
+        out, _, _ = \
+            get_diff_results(base_run_names, new_run_names,
+                             '--unresolved', None, extra_args)
+
+        lines = out.split(os.linesep)
+        for idx, line in enumerate(lines):
+            if '[core.DivideZero]' in line:
+                self.assertTrue(lines[idx + 1].strip(), "Invalid line content")
+
+        # Check HTML output
+        for file_path in os.listdir(html_reports):
+            with open(os.path.join(html_reports, file_path)) as f:
+                self.assertNotIn(InvalidFileContentMsg, f.read())
+
+        shutil.rmtree(html_reports, ignore_errors=True)