From d162500e5318b7e0b35fc0cc326c6886d6960c7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20Csord=C3=A1s?= <csordasmarton92@gmail.com>
Date: Wed, 4 Aug 2021 14:48:55 +0200
Subject: [PATCH] [cli] Local diff workflow support

- Extend the `CodeChecker parse` command with an extra baseline output type
which can be used to generate a baseline file which will contain
report hashes for legacy reports.
- Extend the `CodeChecker cmd diff` to support baseline files.
- Add test cases.
- Extend the documentation with the recommended usage of this workflow.
---
 analyzer/codechecker_analyzer/cmd/parse.py    |  31 +-
 .../test_analyze_and_parse.py                 |  46 +++
 codechecker_common/output/baseline.py         |  76 +++++
 docs/analyzer/user_guide.md                   |  17 +-
 docs/usage.md                                 |  19 ++
 docs/web/user_guide.md                        |  37 ++-
 web/client/codechecker_client/cmd/cmd.py      |  35 ++-
 .../codechecker_client/cmd_line_client.py     | 271 +++++++++++-------
 .../functional/diff_local/test_diff_local.py  | 115 +++++++-
 .../test_diff_local_remote.py                 | 109 ++++++-
 web/tests/libtest/codechecker.py              |  14 +
 11 files changed, 625 insertions(+), 145 deletions(-)
 create mode 100644 codechecker_common/output/baseline.py

diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py
index 1f3202f61b..ca7694fb18 100644
--- a/analyzer/codechecker_analyzer/cmd/parse.py
+++ b/analyzer/codechecker_analyzer/cmd/parse.py
@@ -26,7 +26,7 @@
 from codechecker_analyzer import analyzer_context, suppress_handler
 
 from codechecker_common import arg, logger, plist_parser, util, cmd_config
-from codechecker_common.output import json as out_json, twodim, \
+from codechecker_common.output import baseline, json as out_json, twodim, \
     codeclimate, gerrit
 from codechecker_common.skiplist_handler import SkipListHandler
 from codechecker_common.source_code_comment_handler import \
@@ -37,7 +37,7 @@
 
 LOG = logger.get_logger('system')
 
-EXPORT_TYPES = ['html', 'json', 'codeclimate', 'gerrit']
+EXPORT_TYPES = ['html', 'json', 'codeclimate', 'gerrit', 'baseline']
 
 _data_files_dir_path = analyzer_context.get_context().data_files_dir_path
 _severity_map_file = os.path.join(_data_files_dir_path, 'config',
@@ -457,7 +457,11 @@ def add_arguments_to_parser(parser):
                                   "For more information see:\n"
                                   "https://github.com/codeclimate/platform/"
                                   "blob/master/spec/analyzers/SPEC.md"
-                                  "#data-types")
+                                  "#data-types\n"
+                                  "'baseline' output can be used to integrate "
+                                  "CodeChecker into your local workflow "
+                                  "without using a CodeChecker server. For "
+                                  "more information see our usage guide.")
 
     output_opts.add_argument('-o', '--output',
                              dest="output_path",
@@ -639,6 +643,9 @@ def _parse_convert_reports(
             report.trim_path_prefixes(trim_path_prefixes)
 
     number_of_reports = len(all_reports)
+    if out_format == "baseline":
+        return (baseline.convert(all_reports), number_of_reports)
+
     if out_format == "codeclimate":
         return (codeclimate.convert(all_reports, severity_map),
                 number_of_reports)
@@ -693,11 +700,6 @@ def _generate_json_output(
         output_text = json.dumps(reports)
 
         if output_path:
-            output_path = os.path.abspath(output_path)
-
-            if not os.path.exists(output_path):
-                os.mkdir(output_path)
-
             output_file_path = os.path.join(output_path, 'reports.json')
             with open(output_file_path, mode='w', encoding='utf-8',
                       errors="ignore") as output_f:
@@ -793,7 +795,20 @@ def main(args):
     if 'output_path' in args:
         output_path = os.path.abspath(args.output_path)
 
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+
     if export:
+        if export == 'baseline':
+            report_hashes, number_of_reports = _parse_convert_reports(
+                args.input, export, context.severity_map, trim_path_prefixes,
+                skip_handler)
+
+            if output_path:
+                baseline.write(output_path, report_hashes)
+
+            sys.exit(2 if number_of_reports else 0)
+
         # The HTML part will be handled separately below.
         if export != 'html':
             sys.exit(_generate_json_output(
diff --git a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py
index 2e78ef33ce..f5ebfa5b72 100644
--- a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py
+++ b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py
@@ -24,6 +24,8 @@
 from libtest import project
 from libtest.codechecker import call_command
 
+from codechecker_common.output import baseline
+
 
 class AnalyzeParseTestCaseMeta(type):
     def __new__(mcs, name, bases, test_dict):
@@ -544,3 +546,47 @@ def test_html_export_exit_code(self):
         out, _, result = call_command(extract_cmd, cwd=self.test_dir,
                                       env=self.env)
         self.assertEqual(result, 0, "Parsing should not found any issue.")
+
+    def test_baseline_output(self):
+        """ Test parse baseline output. """
+        output_path = self.test_workspaces['OUTPUT']
+        out_file_path = os.path.join(output_path, "reports.baseline")
+
+        # Analyze the first project.
+        test_project_notes = os.path.join(
+            self.test_workspaces['NORMAL'], "test_files", "notes")
+
+        extract_cmd = ['CodeChecker', 'parse',
+                       "-e", "baseline",
+                       "-o", output_path,
+                       test_project_notes,
+                       '--trim-path-prefix', test_project_notes]
+
+        _, _, result = call_command(
+            extract_cmd, cwd=self.test_dir, env=self.env)
+        self.assertEqual(result, 2, "Parsing not found any issue.")
+
+        report_hashes = baseline.get_report_hashes([out_file_path])
+        self.assertEqual(
+            report_hashes, {'3d15184f38c5fa57e479b744fe3f5035'})
+
+        # Analyze the second project and see whether the baseline file is
+        # merged.
+        test_project_macros = os.path.join(
+            self.test_workspaces['NORMAL'], "test_files", "macros")
+
+        extract_cmd = ['CodeChecker', 'parse',
+                       "-e", "baseline",
+                       "-o", output_path,
+                       test_project_macros,
+                       '--trim-path-prefix', test_project_macros]
+
+        _, _, result = call_command(
+            extract_cmd, cwd=self.test_dir, env=self.env)
+        self.assertEqual(result, 2, "Parsing not found any issue.")
+
+        report_hashes = baseline.get_report_hashes([out_file_path])
+        self.assertEqual(
+            report_hashes, {
+                '3d15184f38c5fa57e479b744fe3f5035',
+                'f8fbc46cc5afbb056d92bd3d3d702781'})
diff --git a/codechecker_common/output/baseline.py b/codechecker_common/output/baseline.py
new file mode 100644
index 0000000000..d1dfa2fe22
--- /dev/null
+++ b/codechecker_common/output/baseline.py
@@ -0,0 +1,76 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+""" CodeChecker baseline output helpers. """
+
+from io import TextIOWrapper
+import os
+from typing import Iterable, List, Set
+
+from codechecker_common import logger
+from codechecker_common.report import Report
+
+
+LOG = logger.get_logger('system')
+
+
+def __get_report_hashes(f: TextIOWrapper) -> List[str]:
+    """ Get report hashes from the given file. """
+    return [h for h in f.readlines() if h]
+
+
+def get_report_hashes(
+    baseline_file_paths: Iterable[str]
+) -> Set[str]:
+    """ Get uniqued hashes from baseline files. """
+    report_hashes = set()
+    for file_path in baseline_file_paths:
+        with open(file_path, mode='r', encoding='utf-8', errors="ignore") as f:
+            report_hashes.update(__get_report_hashes(f))
+
+    return report_hashes
+
+
+def convert(reports: Iterable[Report]) -> List[str]:
+    """ Convert the given reports to CodeChecker baseline format.
+
+    Returns a list of sorted unique report hashes.
+    """
+    return sorted(set(r.report_hash for r in reports))
+
+
+def write(output_dir_path: str, report_hashes: Iterable[str]):
+    """ Create a new baseline file or extend an existing one with the given
+    report hashes in the given output directory. It will remove the duplicates
+    and also sort the report hashes before writing it to a file.
+    """
+    file_path = os.path.join(output_dir_path, 'reports.baseline')
+    with open(file_path, mode='a+', encoding='utf-8', errors="ignore") as f:
+        f.seek(0)
+        old_report_hashes = __get_report_hashes(f)
+        new_report_hashes = set(report_hashes) - set(old_report_hashes)
+
+        if not new_report_hashes:
+            LOG.info("Baseline file (%s) is up-to-date.", file_path)
+            return
+
+        if old_report_hashes:
+            LOG.info("Merging existing baseline file: %s", file_path)
+        else:
+            LOG.info("Creating new baseline file: %s", file_path)
+
+        LOG.info("Total number of old report hashes: %d",
+                 len(old_report_hashes))
+        LOG.info("Total number of new report hashes: %d",
+                 len(new_report_hashes))
+
+        LOG.debug("New report hashes: %s", sorted(new_report_hashes))
+
+        f.seek(0)
+        f.truncate()
+        f.write("\n".join(sorted(
+            set([*old_report_hashes, *report_hashes]))))
diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md
index 1b376bb786..3a2629228e 100644
--- a/docs/analyzer/user_guide.md
+++ b/docs/analyzer/user_guide.md
@@ -1577,10 +1577,11 @@ Statistics analysis feature arguments:
   </summary>
 
 ```
-Usage: CodeChecker parse [-h] [--config CONFIG_FILE] [-t {plist}]
-                         [-e {html,json,codeclimate,gerrit}] [-o OUTPUT_PATH]
-                         [--suppress SUPPRESS] [--export-source-suppress]
-                         [--print-steps] [-i SKIPFILE]
+usage: CodeChecker parse [-h] [--config CONFIG_FILE] [-t {plist}]
+                         [-e {html,json,codeclimate,gerrit,baseline}]
+                         [-o OUTPUT_PATH] [--suppress SUPPRESS]
+                         [--export-source-suppress] [--print-steps]
+                         [-i SKIPFILE]
                          [--trim-path-prefix [TRIM_PATH_PREFIX [TRIM_PATH_PREFIX ...]]]
                          [--review-status [REVIEW_STATUS [REVIEW_STATUS ...]]]
                          [--verbose {info,debug_analyzer,debug}]
@@ -1643,12 +1644,16 @@ optional arguments:
                         Set verbosity level.
 
 export arguments:
-  -e {html,json,codeclimate,gerrit}, --export {html,json,codeclimate,gerrit}
+  -e {html,json,codeclimate,gerrit,baseline}, --export {html,json,codeclimate,gerrit,baseline}
                         Specify extra output format type.
                         'codeclimate' format can be used for Code Climate and
                         for GitLab integration. For more information see:
                         https://github.com/codeclimate/platform/blob/master/sp
-                        ec/analyzers/SPEC.md#data-types (default: None)
+                        ec/analyzers/SPEC.md#data-types
+                        'baseline' output can be used to integrate CodeChecker
+                        into your local workflow without using a CodeChecker
+                        server. For more information see our usage guide.
+                        (default: None)
   -o OUTPUT_PATH, --output OUTPUT_PATH
                         Store the output in the given folder.
 
diff --git a/docs/usage.md b/docs/usage.md
index 394bbc03ce..d6a4ae95a7 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -41,6 +41,7 @@ It invokes Clang Static Analyzer and Clang-Tidy tools to analyze your code.
       - [Alternative 2: Store each analysis in a new run](#storing-new-runs)
     - [Gerrit Integration](#gerrit-integration)
     - [Setting up user authentication](authentication)
+  - [Step 9: Integrate CodeChecker into your local workflow](#step-9)
   - [Updating CodeChecker to new version](#upgrade)
 - [Unique Report Identifier (RI)](#unique-report-identifier)
   - [Listing and Counting Reports](#listing-reports)
@@ -754,6 +755,24 @@ guide.
 You can set up authentication for your server and (web,command line) clients
 as described in the [Authentication Guide](web/authentication.md).
 
+
+## Step 9: Integrate CodeChecker into your local workflow <a name="step-9"></a>
+If you want to use CodeChecker in your project but you don't want to run a
+CodeChecker server and to fix every reports found by CodeChecker on the first
+time (legacy findings) you can do the following steps:
+1. Analyze your project to a report directory (e.g.: `./reports`). For more
+information see [Step 2](#step-2).
+2. Create a baseline file from the reports which contains the legacy findings:
+`CodeChecker parse ./reports -e baseline -o .`. It is recommended to store
+this baseline file (`reports.baseline`) in your repository.
+3. On source code changes after your project is re-analyzed use the
+CodeChecker diff command to get the new reports:
+`CodeChecker cmd diff -b ./reports.baseline -n ./reports --new`
+4. On configuration changes (new checkers / options are enabled / disabled,
+new CodeChecker / clang version is used, etc.) re-generate the baseline file
+(step 1-2).
+
+
 ## Updating CodeChecker to new version <a name="upgrade"></a>
 If a new CodeChecker release is available it might be possible that there are
 some database changes compared to the previous release. If you run into
diff --git a/docs/web/user_guide.md b/docs/web/user_guide.md
index fe0212793a..5e5cdc6af5 100644
--- a/docs/web/user_guide.md
+++ b/docs/web/user_guide.md
@@ -1092,26 +1092,29 @@ optional arguments:
                         The 'base' (left) side of the difference: these
                         analysis runs are used as the initial state in the
                         comparison. The parameter can be multiple run names
-                        (on the remote server) or multiple local report
-                        directories (result of the analyze command). In case
-                        of run name the the basename can contain * quantifiers
-                        which matches any number of characters (zero or more).
-                        So if you have run-a-1, run-a-2 and run-b-1 then
-                        "run-a*" selects the first two. In case of run names
-                        tag labels can also be used separated by a colon (:)
-                        character: "run_name:tag_name".
-  -n NEW_RUNS [NEW_RUNS ...], --newname NEW_RUNS [NEW_RUNS ...]
-                        The 'new' (right) side of the difference: these
-                        analysis runs are compared to the -b/--basename runs.
-                        The parameter can be multiple run names (on the remote
-                        server) or multiple local report directories (result
-                        of the analyze command). In case of run name the
-                        newname can contain * quantifiers which matches any
+                        (on the remote server), multiple local report
+                        directories (result of the analyze command) or
+                        baseline files (generated by the 'CodeChecker parse -e
+                        baseline' command). In case of run name the the
+                        basename can contain * quantifiers which matches any
                         number of characters (zero or more). So if you have
                         run-a-1, run-a-2 and run-b-1 then "run-a*" selects the
                         first two. In case of run names tag labels can also be
                         used separated by a colon (:) character:
                         "run_name:tag_name".
+  -n NEW_RUNS [NEW_RUNS ...], --newname NEW_RUNS [NEW_RUNS ...]
+                        The 'new' (right) side of the difference: these
+                        analysis runs are compared to the -b/--basename runs.
+                        The parameter can be multiple run names (on the remote
+                        server), multiple local report directories (result of
+                        the analyze command) or baseline files (generated by
+                        the 'CodeChecker parse -e baseline' command). In case
+                        of run name the newname can contain * quantifiers
+                        which matches any number of characters (zero or more).
+                        So if you have run-a-1, run-a-2 and run-b-1 then
+                        "run-a*" selects the first two. In case of run names
+                        tag labels can also be used separated by a colon (:)
+                        character: "run_name:tag_name".
   -o {plaintext,rows,table,csv,json,html,gerrit,codeclimate} [{plaintext,rows,table,csv,json,html,gerrit,codeclimate} ...], --output {plaintext,rows,table,csv,json,html,gerrit,codeclimate} [{plaintext,rows,table,csv,json,html,gerrit,codeclimate} ...]
                         The output format(s) to use in showing the data.
                         - html: multiple html files will be generated in the
@@ -1317,6 +1320,10 @@ exist in the remote run 'run1' but appear in the local report directory:
 Compare two runs and show results that exist in both runs and filter results
 by multiple severity values:
     CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium
+
+Compare a baseline file (generated by the 'CodeChecker parse -e baseline'
+command) and a local report directory and show new results:
+    CodeChecker cmd diff -b /reports.baseline -n /my_report_dir --new
 ```
 </details>
 
diff --git a/web/client/codechecker_client/cmd/cmd.py b/web/client/codechecker_client/cmd/cmd.py
index 0ff49539a1..94fd781a9e 100644
--- a/web/client/codechecker_client/cmd/cmd.py
+++ b/web/client/codechecker_client/cmd/cmd.py
@@ -490,16 +490,17 @@ def __register_diff(parser):
                         help="The 'base' (left) side of the difference: these "
                              "analysis runs are used as the initial state in "
                              "the comparison. The parameter can be multiple "
-                             "run names (on the remote server) or multiple "
+                             "run names (on the remote server), multiple "
                              "local report directories (result of the analyze "
-                             "command). In case of run name the the basename "
-                             "can contain * quantifiers which matches any "
-                             "number of characters (zero or more). So if you "
-                             "have run-a-1, run-a-2 and run-b-1 then "
-                             "\"run-a*\" selects the first two. In case of "
-                             "run names tag labels can also be used separated "
-                             "by a colon (:) character: "
-                             "\"run_name:tag_name\".")
+                             "command) or baseline files (generated by the "
+                             "'CodeChecker parse -e baseline' command). In "
+                             "case of run name the the basename can contain * "
+                             "quantifiers which matches any number of "
+                             "characters (zero or more). So if you have "
+                             "run-a-1, run-a-2 and run-b-1 then \"run-a*\" "
+                             "selects the first two. In case of run names tag "
+                             "labels can also be used separated by a colon "
+                             "(:) character: \"run_name:tag_name\".")
 
     parser.add_argument('-n', '--newname',
                         type=str,
@@ -510,11 +511,13 @@ def __register_diff(parser):
                         help="The 'new' (right) side of the difference: these "
                              "analysis runs are compared to the -b/--basename "
                              "runs. The parameter can be multiple run names "
-                             "(on the remote server) or multiple local "
+                             "(on the remote server), multiple local "
                              "report directories (result of the analyze "
-                             "command). In case of run name the newname can "
-                             "contain * quantifiers which matches any number "
-                             "of characters (zero or more). So if you have "
+                             "command) or baseline files (generated by the "
+                             "'CodeChecker parse -e baseline' command). In "
+                             "case of run name the newname can contain * "
+                             "quantifiers which matches any number of "
+                             "characters (zero or more). So if you have "
                              "run-a-1, run-a-2 and run-b-1 then "
                              "\"run-a*\" selects the first two. In case of "
                              "run names tag labels can also be used separated "
@@ -1342,7 +1345,11 @@ def add_arguments_to_parser(parser):
 
 Compare two runs and show results that exist in both runs and filter results
 by multiple severity values:
-    CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium'''
+    CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium
+
+Compare a baseline file (generated by the 'CodeChecker parse -e baseline'
+command) and a local report directory and show new results:
+    CodeChecker cmd diff -b /reports.baseline -n /my_report_dir --new'''
     )
     __register_diff(diff)
 
diff --git a/web/client/codechecker_client/cmd_line_client.py b/web/client/codechecker_client/cmd_line_client.py
index 17f4e0b449..9f59989859 100644
--- a/web/client/codechecker_client/cmd_line_client.py
+++ b/web/client/codechecker_client/cmd_line_client.py
@@ -20,7 +20,7 @@
 import sys
 import shutil
 import time
-from typing import Dict, List, Tuple, Union
+from typing import Dict, Iterable, List, Tuple, Union
 
 from plist_to_html import PlistToHtml
 
@@ -29,7 +29,7 @@
 
 from codechecker_common import logger, plist_parser, util
 from codechecker_common.report import Report
-from codechecker_common.output import twodim, gerrit, codeclimate
+from codechecker_common.output import twodim, gerrit, codeclimate, baseline
 from codechecker_report_hash.hash import get_report_path_hash
 
 from codechecker_web.shared import webserver_context
@@ -55,19 +55,26 @@ def init_logger(level, stream=None, logger_name='system'):
     LOG = logger.get_logger(logger_name)
 
 
-def filter_localdir_remote_run(
-        run_args: List[str]) -> Tuple[List[str], List[str]]:
-    """Filter out arguments which are local directory or remote run names."""
+def filter_local_file_remote_run(
+    run_args: List[str]
+) -> Tuple[List[str], List[str], List[str]]:
+    """
+    Filter out arguments which are local directory, baseline files or remote
+    run names.
+    """
     local_dirs = []
+    baseline_files = []
     run_names = []
 
     for r in run_args:
         if os.path.isdir(r):
             local_dirs.append(os.path.abspath(r))
+        elif os.path.isfile(r) and r.endswith(".baseline"):
+            baseline_files.append(os.path.abspath(r))
         else:
             run_names.append(r)
 
-    return local_dirs, run_names
+    return local_dirs, baseline_files, run_names
 
 
 def run_sort_type_str(value):
@@ -822,68 +829,91 @@ def handle_diff_results(args):
     context = webserver_context.get_context()
     source_line_contents = {}
 
-    def get_diff_local_dir_remote_run(client, report_dirs, remote_run_names):
-        """Compare a local report directory with a remote run."""
+    def get_diff_local_dir_remote_run(
+        client,
+        report_dirs: List[str],
+        baseline_files: List[str],
+        remote_run_names: List[str]
+    ):
+        """ Compare a local report directory with a remote run. """
         filtered_reports = []
-        report_dir_results = get_report_dir_results(report_dirs,
-                                                    args,
-                                                    context.severity_map)
+        filtered_report_hashes = set()
+
+        report_dir_results = get_report_dir_results(
+            report_dirs, args, context.severity_map)
         suppressed_in_code = get_suppressed_reports(report_dir_results, args)
 
         diff_type = get_diff_type(args)
         run_ids, run_names, tag_ids = \
             process_run_args(client, remote_run_names)
         local_report_hashes = set([r.report_hash for r in report_dir_results])
+        local_report_hashes.update(baseline.get_report_hashes(baseline_files))
 
         if diff_type == ttypes.DiffType.NEW:
             # Get report hashes which can be found only in the remote runs.
-            remote_hashes = \
-                client.getDiffResultsHash(run_ids,
-                                          local_report_hashes,
-                                          ttypes.DiffType.RESOLVED,
-                                          None,
-                                          tag_ids)
-
-            results = get_diff_base_results(client, args, run_ids,
-                                            remote_hashes,
-                                            suppressed_in_code)
+            remote_hashes = client.getDiffResultsHash(
+                run_ids, local_report_hashes, ttypes.DiffType.RESOLVED,
+                None, tag_ids)
+
+            results = get_diff_base_results(
+                client, args, run_ids, remote_hashes, suppressed_in_code)
+
             for result in results:
                 filtered_reports.append(result)
         elif diff_type == ttypes.DiffType.UNRESOLVED:
             # Get remote hashes which can be found in the remote run and in the
             # local report directory.
-            remote_hashes = \
-                client.getDiffResultsHash(run_ids,
-                                          local_report_hashes,
-                                          ttypes.DiffType.UNRESOLVED,
-                                          None,
-                                          tag_ids)
+            remote_hashes = client.getDiffResultsHash(
+                run_ids, local_report_hashes, ttypes.DiffType.UNRESOLVED,
+                None, tag_ids)
+
+            filtered_report_hashes = local_report_hashes.copy()
             for result in report_dir_results:
                 rep_h = result.report_hash
+                filtered_report_hashes.discard(rep_h)
                 if rep_h in remote_hashes and rep_h not in suppressed_in_code:
                     filtered_reports.append(result)
+            filtered_report_hashes &= set(remote_hashes)
+
+            # Try to get missing report from the server based on the report
+            # hashes.
+            if filtered_report_hashes:
+                results = get_diff_base_results(
+                    client, args, run_ids, list(filtered_report_hashes),
+                    suppressed_in_code)
+
+                for result in results:
+                    filtered_report_hashes.discard(result.bugHash)
+                    filtered_reports.append(result)
         elif diff_type == ttypes.DiffType.RESOLVED:
             # Get remote hashes which can be found in the remote run and in the
             # local report directory.
-            remote_hashes = \
-                client.getDiffResultsHash(run_ids,
-                                          local_report_hashes,
-                                          ttypes.DiffType.UNRESOLVED,
-                                          None,
-                                          tag_ids)
+            remote_hashes = client.getDiffResultsHash(
+                run_ids, local_report_hashes, ttypes.DiffType.UNRESOLVED,
+                None, tag_ids)
+
+            filtered_report_hashes = local_report_hashes.copy()
             for result in report_dir_results:
+                filtered_report_hashes.discard(result.report_hash)
                 if result.report_hash not in remote_hashes:
                     filtered_reports.append(result)
-        return filtered_reports, run_names
+            filtered_report_hashes -= set(remote_hashes)
 
-    def get_diff_remote_run_local_dir(client, remote_run_names, report_dirs):
-        """
-        Compares a remote run with a local report directory.
-        """
+        return filtered_reports, filtered_report_hashes, run_names
+
+    def get_diff_remote_run_local_dir(
+        client,
+        remote_run_names: List[str],
+        report_dirs: List[str],
+        baseline_files: List[str]
+    ):
+        """ Compares a remote run with a local report directory. """
         filtered_reports = []
-        report_dir_results = get_report_dir_results(report_dirs,
-                                                    args,
-                                                    context.severity_map)
+        filtered_report_hashes = []
+
+        report_dir_results = get_report_dir_results(
+            report_dirs, args, context.severity_map)
+
         suppressed_in_code = get_suppressed_reports(report_dir_results, args)
 
         diff_type = get_diff_type(args)
@@ -891,35 +921,36 @@ def get_diff_remote_run_local_dir(client, remote_run_names, report_dirs):
             process_run_args(client, remote_run_names)
         local_report_hashes = set([r.report_hash for r in report_dir_results])
 
-        remote_hashes = client.getDiffResultsHash(run_ids,
-                                                  local_report_hashes,
-                                                  diff_type,
-                                                  None,
-                                                  tag_ids)
+        local_report_hashes = local_report_hashes.union(
+            baseline.get_report_hashes(baseline_files))
+
+        remote_hashes = client.getDiffResultsHash(
+            run_ids, local_report_hashes, diff_type, None, tag_ids)
 
         if not remote_hashes:
-            return filtered_reports, run_names
+            return filtered_reports, filtered_report_hashes, run_names
 
         if diff_type in [ttypes.DiffType.NEW, ttypes.DiffType.UNRESOLVED]:
             # Shows reports from the report dir which are not present in
             # the baseline (NEW reports) or appear in both side (UNRESOLVED
             # reports) and not suppressed in the code.
+            filtered_report_hashes = set(remote_hashes)
+
             for result in report_dir_results:
                 rep_h = result.report_hash
+                filtered_report_hashes.discard(rep_h)
                 if rep_h in remote_hashes and rep_h not in suppressed_in_code:
                     filtered_reports.append(result)
         elif diff_type == ttypes.DiffType.RESOLVED:
             # Show bugs in the baseline (server) which are not present in
             # the report dir or suppressed.
-            results = get_diff_base_results(client,
-                                            args,
-                                            run_ids,
-                                            remote_hashes,
-                                            suppressed_in_code)
+            results = get_diff_base_results(
+                client, args, run_ids, remote_hashes, suppressed_in_code)
+
             for result in results:
                 filtered_reports.append(result)
 
-        return filtered_reports, run_names
+        return filtered_reports, filtered_report_hashes, run_names
 
     def get_diff_remote_runs(client, remote_base_run_names,
                              remote_new_run_names):
@@ -963,17 +994,23 @@ def get_diff_remote_runs(client, remote_base_run_names,
 
         return all_results, base_run_names, new_run_names
 
-    def get_diff_local_dirs(base_run_names, new_run_names):
+    def get_diff_local_dirs(
+        report_dirs: List[str],
+        baseline_files: List[str],
+        new_report_dirs: List[str],
+        new_baseline_files: List[str]
+    ) -> Tuple[List[Report], List[str]]:
         """
         Compares two report directories and returns the filtered results.
         """
         filtered_reports = []
-        base_results = get_report_dir_results(base_run_names,
-                                              args,
-                                              context.severity_map)
-        new_results = get_report_dir_results(new_run_names,
-                                             args,
-                                             context.severity_map)
+        filtered_report_hashes = []
+
+        base_results = get_report_dir_results(
+            report_dirs, args, context.severity_map)
+
+        new_results = get_report_dir_results(
+            new_report_dirs, args, context.severity_map)
 
         new_results = [res for res in new_results
                        if res.check_source_code_comments(args.review_status)]
@@ -981,21 +1018,34 @@ def get_diff_local_dirs(base_run_names, new_run_names):
         base_hashes = set([res.report_hash for res in base_results])
         new_hashes = set([res.report_hash for res in new_results])
 
+        # Add hashes from the baseline files.
+        base_hashes.update(baseline.get_report_hashes(baseline_files))
+        new_hashes.update(baseline.get_report_hashes(new_baseline_files))
+
         diff_type = get_diff_type(args)
         if diff_type == ttypes.DiffType.NEW:
+            filtered_report_hashes = new_hashes.copy()
             for res in new_results:
+                filtered_report_hashes.discard(res.report_hash)
+
                 if res.report_hash not in base_hashes:
                     filtered_reports.append(res)
         if diff_type == ttypes.DiffType.UNRESOLVED:
+            filtered_report_hashes = new_hashes.copy()
             for res in new_results:
+                filtered_report_hashes.discard(res.report_hash)
+
                 if res.report_hash in base_hashes:
                     filtered_reports.append(res)
         elif diff_type == ttypes.DiffType.RESOLVED:
+            filtered_report_hashes = base_hashes.copy()
             for res in base_results:
+                filtered_report_hashes.discard(res.report_hash)
+
                 if res.report_hash not in new_hashes:
                     filtered_reports.append(res)
 
-        return filtered_reports
+        return filtered_reports, filtered_report_hashes
 
     def cached_report_file_lookup(file_cache, file_id):
         """
@@ -1115,9 +1165,15 @@ def report_to_html(client, reports, output_dir):
         html_builder.create_index_html(output_dir)
         print_stats(len(reports), file_stats, severity_stats)
 
-    def print_reports(client,
-                      reports: List[Report],
-                      output_formats: List[str]):
+    def print_reports(
+        client,
+        reports: List[Report],
+        report_hashes: Iterable[str],
+        output_formats: List[str]
+    ):
+        if report_hashes:
+            LOG.info("Couldn't get local reports for the following baseline "
+                     "report hashes: %s", ', '.join(sorted(report_hashes)))
 
         selected_output_format_num = len(output_formats)
 
@@ -1288,24 +1344,26 @@ def print_reports(client,
                         "analyze your project again to update the "
                         "reports!", changed_f)
 
-    basename_local_dirs, basename_run_names = \
-        filter_localdir_remote_run(args.base_run_names)
+    basename_local_dirs, basename_baseline_files, basename_run_names = \
+        filter_local_file_remote_run(args.base_run_names)
 
-    newname_local_dirs, newname_run_names = \
-        filter_localdir_remote_run(args.new_run_names)
+    newname_local_dirs, newname_baseline_files, newname_run_names = \
+        filter_local_file_remote_run(args.new_run_names)
 
     has_different_run_args = False
-    if basename_local_dirs and basename_run_names:
+    if (basename_local_dirs or basename_baseline_files) and basename_run_names:
         LOG.error("All base run names must have the same type: local "
-                  "directory (%s) or run names (%s).",
+                  "directory (%s) / baseline files (%s) or run names (%s).",
                   ', '.join(basename_local_dirs),
+                  ', '.join(basename_baseline_files),
                   ', '.join(basename_run_names))
         has_different_run_args = True
 
     if newname_local_dirs and newname_run_names:
         LOG.error("All new run names must have the same type: local "
-                  "directory (%s) or run names (%s).",
+                  "directory (%s) / baseline files (%s) or run names (%s).",
                   ', '.join(newname_local_dirs),
+                  ', '.join(newname_baseline_files),
                   ', '.join(newname_run_names))
         has_different_run_args = True
 
@@ -1324,12 +1382,20 @@ def print_reports(client,
     if basename_local_dirs:
         LOG.info("Matching local report directories (--baseline): %s",
                  ', '.join(basename_local_dirs))
+    if basename_baseline_files:
+        LOG.info("Matching local baseline files (--baseline): %s",
+                 ', '.join(basename_baseline_files))
+
     if newname_local_dirs:
         LOG.info("Matching local report directories (--newname): %s",
                  ', '.join(newname_local_dirs))
+    if newname_baseline_files:
+        LOG.info("Matching local baseline files (--newname): %s",
+                 ', '.join(newname_baseline_files))
 
     client = None
-    # We set up the client if we are not comparing two local report directory.
+    # We set up the client if we are not comparing two local report directories
+    # or baseline files.
     if basename_run_names or newname_run_names:
         if basename_run_names:
             LOG.info("Given remote runs (--baseline): %s",
@@ -1347,40 +1413,45 @@ def print_reports(client,
                       args.product_url)
             raise sexit
 
-    if basename_local_dirs and newname_local_dirs:
-        reports = get_diff_local_dirs(basename_local_dirs,
-                                      newname_local_dirs)
-        print_reports(client, reports, args.output_format)
-        LOG.info("Compared the following local report directories: %s and %s",
-                 ', '.join(basename_local_dirs),
-                 ', '.join(newname_local_dirs))
-    elif newname_local_dirs:
-        reports, matching_base_run_names = \
-            get_diff_remote_run_local_dir(client,
-                                          basename_run_names,
-                                          newname_local_dirs)
-        print_reports(client, reports, args.output_format)
-        LOG.info("Compared remote run(s) %s (matching: %s) and local report "
-                 "directory(s) %s",
+    report_hashes = []
+    if (basename_local_dirs or basename_baseline_files) and \
+       (newname_local_dirs or newname_baseline_files):
+        reports, report_hashes = get_diff_local_dirs(
+            basename_local_dirs, basename_baseline_files,
+            newname_local_dirs, newname_baseline_files)
+
+        print_reports(client, reports, report_hashes, args.output_format)
+        LOG.info("Compared the following local files / directories: %s and %s",
+                 ', '.join([*basename_local_dirs, *basename_baseline_files]),
+                 ', '.join([*newname_local_dirs, *newname_baseline_files]))
+    elif newname_local_dirs or newname_baseline_files:
+        reports, report_hashes, matching_base_run_names = \
+            get_diff_remote_run_local_dir(
+                client, basename_run_names,
+                newname_local_dirs, newname_baseline_files)
+
+        print_reports(client, reports, report_hashes, args.output_format)
+        LOG.info("Compared remote run(s) %s (matching: %s) and local files / "
+                 "report directory(s) %s",
                  ', '.join(basename_run_names),
                  ', '.join(matching_base_run_names),
-                 ', '.join(newname_local_dirs))
-    elif basename_local_dirs:
-        reports, matching_new_run_names = \
-            get_diff_local_dir_remote_run(client,
-                                          basename_local_dirs,
-                                          newname_run_names)
-
-        print_reports(client, reports, args.output_format)
-        LOG.info("Compared local report directory(s) %s and remote run(s) %s "
-                 "(matching: %s).",
-                 ', '.join(basename_local_dirs),
+                 ', '.join([*newname_local_dirs, *newname_baseline_files]))
+    elif (basename_local_dirs or basename_baseline_files):
+        reports, report_hashes, matching_new_run_names = \
+            get_diff_local_dir_remote_run(
+                client, basename_local_dirs, basename_baseline_files,
+                newname_run_names)
+
+        print_reports(client, reports, report_hashes, args.output_format)
+        LOG.info("Compared local files / report directory(s) %s and remote "
+                 "run(s) %s (matching: %s).",
+                 ', '.join([*basename_local_dirs, *basename_baseline_files]),
                  ', '.join(newname_run_names),
                  ', '.join(matching_new_run_names))
     else:
         reports, matching_base_run_names, matching_new_run_names = \
             get_diff_remote_runs(client, basename_run_names, newname_run_names)
-        print_reports(client, reports, args.output_format)
+        print_reports(client, reports, None, args.output_format)
         LOG.info("Compared multiple remote runs %s (matching: %s) and %s "
                  "(matching: %s)",
                  ', '.join(basename_run_names),
@@ -1388,7 +1459,7 @@ def print_reports(client,
                  ', '.join(newname_run_names),
                  ', '.join(matching_new_run_names))
 
-    if len(reports) != 0:
+    if len(reports) != 0 or len(report_hashes) != 0:
         sys.exit(2)
 
 
diff --git a/web/tests/functional/diff_local/test_diff_local.py b/web/tests/functional/diff_local/test_diff_local.py
index d1715d8497..d3d1743101 100644
--- a/web/tests/functional/diff_local/test_diff_local.py
+++ b/web/tests/functional/diff_local/test_diff_local.py
@@ -21,7 +21,7 @@
 import unittest
 
 from libtest import env, codechecker
-from libtest.codechecker import get_diff_results
+from libtest.codechecker import create_baseline_file, get_diff_results
 
 
 class DiffLocal(unittest.TestCase):
@@ -267,3 +267,116 @@ def test_suppress_reports(self):
         res, _, _ = get_diff_results(
             [report_dir_base], [report_dir_new], '--resolved', 'json')
         self.assertEqual(len(res), 2)
+
+    def test_basename_baseline_file_json(self):
+        """
+        Get reports based on a baseline file given to the basename option.
+        """
+        baseline_file_path = create_baseline_file(self.base_reports)
+
+        # Get new results.
+        new_results, _, _ = get_diff_results(
+            [baseline_file_path], [self.new_reports], '--new', 'json')
+
+        print(new_results)
+
+        for new_result in new_results:
+            self.assertEqual(new_result['checkerId'], "core.NullDereference")
+
+        # Get unresolved results.
+        unresolved_results, _, _ = get_diff_results(
+            [baseline_file_path], [self.new_reports], '--unresolved', 'json')
+
+        print(unresolved_results)
+
+        self.assertTrue(any(
+            r for r in unresolved_results
+            if r['checkerId'] == 'core.DivideZero'))
+
+        self.assertFalse(any(
+            r for r in unresolved_results
+            if r['checkerId'] == 'core.NullDereference' or
+            r['checkerId'] == 'core.CallAndMessage'))
+
+        # Get resolved results.
+        resolved_results, err, returncode = get_diff_results(
+            [baseline_file_path], [self.new_reports], '--resolved', 'json')
+
+        self.assertFalse(resolved_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: ",
+            err)
+
+    def test_newname_baseline_file_json(self):
+        """
+        Get reports based on a baseline file given to the newname option.
+        """
+        baseline_file_path = create_baseline_file(self.new_reports)
+
+        # Get new results.
+        new_results, err, returncode = get_diff_results(
+            [self.base_reports], [baseline_file_path], '--new', 'json')
+
+        self.assertFalse(new_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: ",
+            err)
+
+        # Get unresolved results.
+        unresolved_results, err, returncode = get_diff_results(
+            [self.base_reports], [baseline_file_path], '--unresolved', 'json')
+
+        self.assertFalse(unresolved_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: ",
+            err)
+
+        # Get resolved results.
+        resolved_results, _, _ = get_diff_results(
+            [self.base_reports], [baseline_file_path], '--resolved', 'json')
+
+        for report in resolved_results:
+            self.assertEqual(report['checkerId'], "core.CallAndMessage")
+
+    def test_multiple_baseline_file_json(self):
+        """ Test multiple baseline file for basename option. """
+        baseline_file_paths = [
+            create_baseline_file(self.base_reports),
+            create_baseline_file(self.new_reports)]
+
+        # Get new results.
+        new_results, _, returncode = get_diff_results(
+            baseline_file_paths, [self.new_reports], '--new', 'json')
+
+        print(new_results)
+
+        self.assertFalse(new_results)
+        self.assertFalse(returncode)
+
+        # Get unresolved results.
+        unresolved_results, _, returncode = get_diff_results(
+            baseline_file_paths, [self.new_reports], '--unresolved', 'json')
+        print(unresolved_results)
+
+        self.assertTrue(any(
+            r for r in unresolved_results
+            if r['checkerId'] == 'core.DivideZero'))
+
+        # Get resolved results.
+        resolved_results, err, returncode = get_diff_results(
+            baseline_file_paths, [self.new_reports], '--resolved', 'json')
+
+        print(resolved_results)
+
+        self.assertFalse(resolved_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: ",
+            err)
diff --git a/web/tests/functional/diff_local_remote/test_diff_local_remote.py b/web/tests/functional/diff_local_remote/test_diff_local_remote.py
index 323b109fb9..72138c7567 100644
--- a/web/tests/functional/diff_local_remote/test_diff_local_remote.py
+++ b/web/tests/functional/diff_local_remote/test_diff_local_remote.py
@@ -22,7 +22,7 @@
 import unittest
 
 from libtest import env
-from libtest.codechecker import get_diff_results
+from libtest.codechecker import create_baseline_file, get_diff_results
 
 
 class LocalRemote(unittest.TestCase):
@@ -559,3 +559,110 @@ def test_diff_remote_local_resolved_same(self):
             [self._run_names[0]], [self._remote_reports],
             '--resolved', 'json', ["--url", self._url])
         self.assertEqual(out, [])
+
+    def test_local_to_remote_with_baseline_file(self):
+        """
+        Get reports based on a baseline file given to the basename option.
+        """
+        baseline_file_path = create_baseline_file(self._local_reports)
+
+        # Get new reports.
+        new_results, _, returncode = get_diff_results(
+            [baseline_file_path], [self._run_names[0]], '--new', 'json',
+            ["--url", self._url])
+        print(new_results)
+
+        for report in new_results:
+            self.assertEqual(report['checkerId'], "core.NullDereference")
+
+        self.assertEqual(returncode, 2)
+
+        # Get unresolved reports.
+        unresolved_results, err, returncode = get_diff_results(
+            [baseline_file_path], [self._run_names[0]], '--unresolved', 'json',
+            ["--url", self._url])
+        print(unresolved_results)
+
+        self.assertTrue(unresolved_results)
+        self.assertFalse(any(
+            r for r in unresolved_results
+            if r['checkerId'] == 'core.CallAndMessage'))
+        self.assertEqual(returncode, 2)
+
+        # Get resolved reports.
+        resolved_results, err, returncode = get_diff_results(
+            [baseline_file_path], [self._run_names[0]], '--resolved', 'json',
+            ["--url", self._url])
+        print(resolved_results)
+
+        self.assertFalse(resolved_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: ",
+            err)
+
+    def test_remote_to_local_with_baseline_file(self):
+        """
+        Get reports based on a baseline file given to the newname option.
+        """
+        baseline_file_path = create_baseline_file(self._local_reports)
+
+        # Get new reports.
+        res, _, _ = get_diff_results(
+            [self._run_names[0]], [self._local_reports],
+            '--new', 'json',
+            ["--url", self._url,
+             "--review-status", "unreviewed", "confirmed", "false_positive"])
+        new_hashes = sorted(set([n['bugHash'] for n in res]))
+
+        new_results, err, returncode = get_diff_results(
+            [self._run_names[0]], [baseline_file_path], '--new', 'json',
+            ["--url", self._url])
+        print(new_results)
+
+        self.assertFalse(new_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: " + ', '.join(new_hashes),
+            err)
+
+        # Get unresolved reports.
+        res, _, _ = get_diff_results(
+            [self._run_names[0]], [self._local_reports],
+            '--unresolved', 'json',
+            ["--url", self._url,
+             "--review-status", "unreviewed", "confirmed", "false_positive"])
+        unresolved_hashes = sorted(set([n['bugHash'] for n in res]))
+
+        unresolved_results, err, returncode = get_diff_results(
+            [self._run_names[0]], [baseline_file_path],
+            '--unresolved', 'json',
+            ["--url", self._url])
+        print(unresolved_results)
+
+        self.assertFalse(unresolved_results)
+        self.assertEqual(returncode, 2)
+        self.assertIn(
+            "Couldn't get local reports for the following baseline report "
+            "hashes: " + ', '.join(unresolved_hashes),
+            err)
+
+        # Get resolved reports.
+        res, _, _ = get_diff_results(
+            [self._run_names[0]], [self._local_reports],
+            '--resolved', 'json',
+            ["--url", self._url,
+             "--review-status", "unreviewed", "confirmed", "false_positive"])
+        resolved_hashes = set([n['bugHash'] for n in res])
+
+        resolved_results, _, returncode = get_diff_results(
+            [self._run_names[0]], [baseline_file_path], '--resolved', 'json',
+            ["--url", self._url])
+        print(resolved_results)
+
+        self.assertTrue(resolved_results)
+        self.assertSetEqual(
+            {r['bugHash'] for r in resolved_results}, resolved_hashes)
+        self.assertEqual(returncode, 2)
diff --git a/web/tests/libtest/codechecker.py b/web/tests/libtest/codechecker.py
index 10e0dd6180..d9e2144eb9 100644
--- a/web/tests/libtest/codechecker.py
+++ b/web/tests/libtest/codechecker.py
@@ -107,6 +107,20 @@ def get_diff_results(basenames, newnames, diff_type, format_type=None,
     return out, err, proc.returncode
 
 
+def create_baseline_file(report_dir: str, cc_env=None) -> str:
+    """ Create baseline file from the given report directory. """
+    parse_cmd = [
+        env.codechecker_cmd(), 'parse', report_dir,
+        '-e', 'baseline', '-o', report_dir]
+
+    proc = subprocess.Popen(
+        parse_cmd, encoding="utf-8", errors="ignore", env=cc_env,
+        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    proc.communicate()
+
+    return os.path.join(report_dir, 'reports.baseline')
+
+
 def login(codechecker_cfg, test_project_path, username, password,
           protocol='http'):
     """