From d162500e5318b7e0b35fc0cc326c6886d6960c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Csord=C3=A1s?= Date: Wed, 4 Aug 2021 14:48:55 +0200 Subject: [PATCH] [cli] Local diff workflow support - Extend the `CodeChecker parse` command with an extra baseline output type which can be used to generate a baseline file which will contain report hashes for legacy reports. - Extend the `CodeChecker cmd diff` to support baseline files. - Add test cases. - Extend the documentation with the recommended usage of this workflow. --- analyzer/codechecker_analyzer/cmd/parse.py | 31 +- .../test_analyze_and_parse.py | 46 +++ codechecker_common/output/baseline.py | 76 +++++ docs/analyzer/user_guide.md | 17 +- docs/usage.md | 19 ++ docs/web/user_guide.md | 37 ++- web/client/codechecker_client/cmd/cmd.py | 35 ++- .../codechecker_client/cmd_line_client.py | 271 +++++++++++------- .../functional/diff_local/test_diff_local.py | 115 +++++++- .../test_diff_local_remote.py | 109 ++++++- web/tests/libtest/codechecker.py | 14 + 11 files changed, 625 insertions(+), 145 deletions(-) create mode 100644 codechecker_common/output/baseline.py diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py index 1f3202f61b..ca7694fb18 100644 --- a/analyzer/codechecker_analyzer/cmd/parse.py +++ b/analyzer/codechecker_analyzer/cmd/parse.py @@ -26,7 +26,7 @@ from codechecker_analyzer import analyzer_context, suppress_handler from codechecker_common import arg, logger, plist_parser, util, cmd_config -from codechecker_common.output import json as out_json, twodim, \ +from codechecker_common.output import baseline, json as out_json, twodim, \ codeclimate, gerrit from codechecker_common.skiplist_handler import SkipListHandler from codechecker_common.source_code_comment_handler import \ @@ -37,7 +37,7 @@ LOG = logger.get_logger('system') -EXPORT_TYPES = ['html', 'json', 'codeclimate', 'gerrit'] +EXPORT_TYPES = ['html', 'json', 'codeclimate', 'gerrit', 'baseline'] _data_files_dir_path = analyzer_context.get_context().data_files_dir_path _severity_map_file = os.path.join(_data_files_dir_path, 'config', @@ -457,7 +457,11 @@ def add_arguments_to_parser(parser): "For more information see:\n" "https://github.com/codeclimate/platform/" "blob/master/spec/analyzers/SPEC.md" - "#data-types") + "#data-types\n" + "'baseline' output can be used to integrate " + "CodeChecker into your local workflow " + "without using a CodeChecker server. For " + "more information see our usage guide.") output_opts.add_argument('-o', '--output', dest="output_path", @@ -639,6 +643,9 @@ def _parse_convert_reports( report.trim_path_prefixes(trim_path_prefixes) number_of_reports = len(all_reports) + if out_format == "baseline": + return (baseline.convert(all_reports), number_of_reports) + if out_format == "codeclimate": return (codeclimate.convert(all_reports, severity_map), number_of_reports) @@ -693,11 +700,6 @@ def _generate_json_output( output_text = json.dumps(reports) if output_path: - output_path = os.path.abspath(output_path) - - if not os.path.exists(output_path): - os.mkdir(output_path) - output_file_path = os.path.join(output_path, 'reports.json') with open(output_file_path, mode='w', encoding='utf-8', errors="ignore") as output_f: @@ -793,7 +795,20 @@ def main(args): if 'output_path' in args: output_path = os.path.abspath(args.output_path) + if not os.path.exists(output_path): + os.makedirs(output_path) + if export: + if export == 'baseline': + report_hashes, number_of_reports = _parse_convert_reports( + args.input, export, context.severity_map, trim_path_prefixes, + skip_handler) + + if output_path: + baseline.write(output_path, report_hashes) + + sys.exit(2 if number_of_reports else 0) + # The HTML part will be handled separately below. if export != 'html': sys.exit(_generate_json_output( diff --git a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py index 2e78ef33ce..f5ebfa5b72 100644 --- a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py +++ b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py @@ -24,6 +24,8 @@ from libtest import project from libtest.codechecker import call_command +from codechecker_common.output import baseline + class AnalyzeParseTestCaseMeta(type): def __new__(mcs, name, bases, test_dict): @@ -544,3 +546,47 @@ def test_html_export_exit_code(self): out, _, result = call_command(extract_cmd, cwd=self.test_dir, env=self.env) self.assertEqual(result, 0, "Parsing should not found any issue.") + + def test_baseline_output(self): + """ Test parse baseline output. """ + output_path = self.test_workspaces['OUTPUT'] + out_file_path = os.path.join(output_path, "reports.baseline") + + # Analyze the first project. + test_project_notes = os.path.join( + self.test_workspaces['NORMAL'], "test_files", "notes") + + extract_cmd = ['CodeChecker', 'parse', + "-e", "baseline", + "-o", output_path, + test_project_notes, + '--trim-path-prefix', test_project_notes] + + _, _, result = call_command( + extract_cmd, cwd=self.test_dir, env=self.env) + self.assertEqual(result, 2, "Parsing not found any issue.") + + report_hashes = baseline.get_report_hashes([out_file_path]) + self.assertEqual( + report_hashes, {'3d15184f38c5fa57e479b744fe3f5035'}) + + # Analyze the second project and see whether the baseline file is + # merged. + test_project_macros = os.path.join( + self.test_workspaces['NORMAL'], "test_files", "macros") + + extract_cmd = ['CodeChecker', 'parse', + "-e", "baseline", + "-o", output_path, + test_project_macros, + '--trim-path-prefix', test_project_macros] + + _, _, result = call_command( + extract_cmd, cwd=self.test_dir, env=self.env) + self.assertEqual(result, 2, "Parsing not found any issue.") + + report_hashes = baseline.get_report_hashes([out_file_path]) + self.assertEqual( + report_hashes, { + '3d15184f38c5fa57e479b744fe3f5035', + 'f8fbc46cc5afbb056d92bd3d3d702781'}) diff --git a/codechecker_common/output/baseline.py b/codechecker_common/output/baseline.py new file mode 100644 index 0000000000..d1dfa2fe22 --- /dev/null +++ b/codechecker_common/output/baseline.py @@ -0,0 +1,76 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" CodeChecker baseline output helpers. """ + +from io import TextIOWrapper +import os +from typing import Iterable, List, Set + +from codechecker_common import logger +from codechecker_common.report import Report + + +LOG = logger.get_logger('system') + + +def __get_report_hashes(f: TextIOWrapper) -> List[str]: + """ Get report hashes from the given file. """ + return [h for h in f.readlines() if h] + + +def get_report_hashes( + baseline_file_paths: Iterable[str] +) -> Set[str]: + """ Get uniqued hashes from baseline files. """ + report_hashes = set() + for file_path in baseline_file_paths: + with open(file_path, mode='r', encoding='utf-8', errors="ignore") as f: + report_hashes.update(__get_report_hashes(f)) + + return report_hashes + + +def convert(reports: Iterable[Report]) -> List[str]: + """ Convert the given reports to CodeChecker baseline format. + + Returns a list of sorted unique report hashes. + """ + return sorted(set(r.report_hash for r in reports)) + + +def write(output_dir_path: str, report_hashes: Iterable[str]): + """ Create a new baseline file or extend an existing one with the given + report hashes in the given output directory. It will remove the duplicates + and also sort the report hashes before writing it to a file. + """ + file_path = os.path.join(output_dir_path, 'reports.baseline') + with open(file_path, mode='a+', encoding='utf-8', errors="ignore") as f: + f.seek(0) + old_report_hashes = __get_report_hashes(f) + new_report_hashes = set(report_hashes) - set(old_report_hashes) + + if not new_report_hashes: + LOG.info("Baseline file (%s) is up-to-date.", file_path) + return + + if old_report_hashes: + LOG.info("Merging existing baseline file: %s", file_path) + else: + LOG.info("Creating new baseline file: %s", file_path) + + LOG.info("Total number of old report hashes: %d", + len(old_report_hashes)) + LOG.info("Total number of new report hashes: %d", + len(new_report_hashes)) + + LOG.debug("New report hashes: %s", sorted(new_report_hashes)) + + f.seek(0) + f.truncate() + f.write("\n".join(sorted( + set([*old_report_hashes, *report_hashes])))) diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md index 1b376bb786..3a2629228e 100644 --- a/docs/analyzer/user_guide.md +++ b/docs/analyzer/user_guide.md @@ -1577,10 +1577,11 @@ Statistics analysis feature arguments: ``` -Usage: CodeChecker parse [-h] [--config CONFIG_FILE] [-t {plist}] - [-e {html,json,codeclimate,gerrit}] [-o OUTPUT_PATH] - [--suppress SUPPRESS] [--export-source-suppress] - [--print-steps] [-i SKIPFILE] +usage: CodeChecker parse [-h] [--config CONFIG_FILE] [-t {plist}] + [-e {html,json,codeclimate,gerrit,baseline}] + [-o OUTPUT_PATH] [--suppress SUPPRESS] + [--export-source-suppress] [--print-steps] + [-i SKIPFILE] [--trim-path-prefix [TRIM_PATH_PREFIX [TRIM_PATH_PREFIX ...]]] [--review-status [REVIEW_STATUS [REVIEW_STATUS ...]]] [--verbose {info,debug_analyzer,debug}] @@ -1643,12 +1644,16 @@ optional arguments: Set verbosity level. export arguments: - -e {html,json,codeclimate,gerrit}, --export {html,json,codeclimate,gerrit} + -e {html,json,codeclimate,gerrit,baseline}, --export {html,json,codeclimate,gerrit,baseline} Specify extra output format type. 'codeclimate' format can be used for Code Climate and for GitLab integration. For more information see: https://github.com/codeclimate/platform/blob/master/sp - ec/analyzers/SPEC.md#data-types (default: None) + ec/analyzers/SPEC.md#data-types + 'baseline' output can be used to integrate CodeChecker + into your local workflow without using a CodeChecker + server. For more information see our usage guide. + (default: None) -o OUTPUT_PATH, --output OUTPUT_PATH Store the output in the given folder. diff --git a/docs/usage.md b/docs/usage.md index 394bbc03ce..d6a4ae95a7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -41,6 +41,7 @@ It invokes Clang Static Analyzer and Clang-Tidy tools to analyze your code. - [Alternative 2: Store each analysis in a new run](#storing-new-runs) - [Gerrit Integration](#gerrit-integration) - [Setting up user authentication](authentication) + - [Step 9: Integrate CodeChecker into your local workflow](#step-9) - [Updating CodeChecker to new version](#upgrade) - [Unique Report Identifier (RI)](#unique-report-identifier) - [Listing and Counting Reports](#listing-reports) @@ -754,6 +755,24 @@ guide. You can set up authentication for your server and (web,command line) clients as described in the [Authentication Guide](web/authentication.md). + +## Step 9: Integrate CodeChecker into your local workflow +If you want to use CodeChecker in your project but you don't want to run a +CodeChecker server and to fix every reports found by CodeChecker on the first +time (legacy findings) you can do the following steps: +1. Analyze your project to a report directory (e.g.: `./reports`). For more +information see [Step 2](#step-2). +2. Create a baseline file from the reports which contains the legacy findings: +`CodeChecker parse ./reports -e baseline -o .`. It is recommended to store +this baseline file (`reports.baseline`) in your repository. +3. On source code changes after your project is re-analyzed use the +CodeChecker diff command to get the new reports: +`CodeChecker cmd diff -b ./reports.baseline -n ./reports --new` +4. On configuration changes (new checkers / options are enabled / disabled, +new CodeChecker / clang version is used, etc.) re-generate the baseline file +(step 1-2). + + ## Updating CodeChecker to new version If a new CodeChecker release is available it might be possible that there are some database changes compared to the previous release. If you run into diff --git a/docs/web/user_guide.md b/docs/web/user_guide.md index fe0212793a..5e5cdc6af5 100644 --- a/docs/web/user_guide.md +++ b/docs/web/user_guide.md @@ -1092,26 +1092,29 @@ optional arguments: The 'base' (left) side of the difference: these analysis runs are used as the initial state in the comparison. The parameter can be multiple run names - (on the remote server) or multiple local report - directories (result of the analyze command). In case - of run name the the basename can contain * quantifiers - which matches any number of characters (zero or more). - So if you have run-a-1, run-a-2 and run-b-1 then - "run-a*" selects the first two. In case of run names - tag labels can also be used separated by a colon (:) - character: "run_name:tag_name". - -n NEW_RUNS [NEW_RUNS ...], --newname NEW_RUNS [NEW_RUNS ...] - The 'new' (right) side of the difference: these - analysis runs are compared to the -b/--basename runs. - The parameter can be multiple run names (on the remote - server) or multiple local report directories (result - of the analyze command). In case of run name the - newname can contain * quantifiers which matches any + (on the remote server), multiple local report + directories (result of the analyze command) or + baseline files (generated by the 'CodeChecker parse -e + baseline' command). In case of run name the the + basename can contain * quantifiers which matches any number of characters (zero or more). So if you have run-a-1, run-a-2 and run-b-1 then "run-a*" selects the first two. In case of run names tag labels can also be used separated by a colon (:) character: "run_name:tag_name". + -n NEW_RUNS [NEW_RUNS ...], --newname NEW_RUNS [NEW_RUNS ...] + The 'new' (right) side of the difference: these + analysis runs are compared to the -b/--basename runs. + The parameter can be multiple run names (on the remote + server), multiple local report directories (result of + the analyze command) or baseline files (generated by + the 'CodeChecker parse -e baseline' command). In case + of run name the newname can contain * quantifiers + which matches any number of characters (zero or more). + So if you have run-a-1, run-a-2 and run-b-1 then + "run-a*" selects the first two. In case of run names + tag labels can also be used separated by a colon (:) + character: "run_name:tag_name". -o {plaintext,rows,table,csv,json,html,gerrit,codeclimate} [{plaintext,rows,table,csv,json,html,gerrit,codeclimate} ...], --output {plaintext,rows,table,csv,json,html,gerrit,codeclimate} [{plaintext,rows,table,csv,json,html,gerrit,codeclimate} ...] The output format(s) to use in showing the data. - html: multiple html files will be generated in the @@ -1317,6 +1320,10 @@ exist in the remote run 'run1' but appear in the local report directory: Compare two runs and show results that exist in both runs and filter results by multiple severity values: CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium + +Compare a baseline file (generated by the 'CodeChecker parse -e baseline' +command) and a local report directory and show new results: + CodeChecker cmd diff -b /reports.baseline -n /my_report_dir --new ``` diff --git a/web/client/codechecker_client/cmd/cmd.py b/web/client/codechecker_client/cmd/cmd.py index 0ff49539a1..94fd781a9e 100644 --- a/web/client/codechecker_client/cmd/cmd.py +++ b/web/client/codechecker_client/cmd/cmd.py @@ -490,16 +490,17 @@ def __register_diff(parser): help="The 'base' (left) side of the difference: these " "analysis runs are used as the initial state in " "the comparison. The parameter can be multiple " - "run names (on the remote server) or multiple " + "run names (on the remote server), multiple " "local report directories (result of the analyze " - "command). In case of run name the the basename " - "can contain * quantifiers which matches any " - "number of characters (zero or more). So if you " - "have run-a-1, run-a-2 and run-b-1 then " - "\"run-a*\" selects the first two. In case of " - "run names tag labels can also be used separated " - "by a colon (:) character: " - "\"run_name:tag_name\".") + "command) or baseline files (generated by the " + "'CodeChecker parse -e baseline' command). In " + "case of run name the the basename can contain * " + "quantifiers which matches any number of " + "characters (zero or more). So if you have " + "run-a-1, run-a-2 and run-b-1 then \"run-a*\" " + "selects the first two. In case of run names tag " + "labels can also be used separated by a colon " + "(:) character: \"run_name:tag_name\".") parser.add_argument('-n', '--newname', type=str, @@ -510,11 +511,13 @@ def __register_diff(parser): help="The 'new' (right) side of the difference: these " "analysis runs are compared to the -b/--basename " "runs. The parameter can be multiple run names " - "(on the remote server) or multiple local " + "(on the remote server), multiple local " "report directories (result of the analyze " - "command). In case of run name the newname can " - "contain * quantifiers which matches any number " - "of characters (zero or more). So if you have " + "command) or baseline files (generated by the " + "'CodeChecker parse -e baseline' command). In " + "case of run name the newname can contain * " + "quantifiers which matches any number of " + "characters (zero or more). So if you have " "run-a-1, run-a-2 and run-b-1 then " "\"run-a*\" selects the first two. In case of " "run names tag labels can also be used separated " @@ -1342,7 +1345,11 @@ def add_arguments_to_parser(parser): Compare two runs and show results that exist in both runs and filter results by multiple severity values: - CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium''' + CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium + +Compare a baseline file (generated by the 'CodeChecker parse -e baseline' +command) and a local report directory and show new results: + CodeChecker cmd diff -b /reports.baseline -n /my_report_dir --new''' ) __register_diff(diff) diff --git a/web/client/codechecker_client/cmd_line_client.py b/web/client/codechecker_client/cmd_line_client.py index 17f4e0b449..9f59989859 100644 --- a/web/client/codechecker_client/cmd_line_client.py +++ b/web/client/codechecker_client/cmd_line_client.py @@ -20,7 +20,7 @@ import sys import shutil import time -from typing import Dict, List, Tuple, Union +from typing import Dict, Iterable, List, Tuple, Union from plist_to_html import PlistToHtml @@ -29,7 +29,7 @@ from codechecker_common import logger, plist_parser, util from codechecker_common.report import Report -from codechecker_common.output import twodim, gerrit, codeclimate +from codechecker_common.output import twodim, gerrit, codeclimate, baseline from codechecker_report_hash.hash import get_report_path_hash from codechecker_web.shared import webserver_context @@ -55,19 +55,26 @@ def init_logger(level, stream=None, logger_name='system'): LOG = logger.get_logger(logger_name) -def filter_localdir_remote_run( - run_args: List[str]) -> Tuple[List[str], List[str]]: - """Filter out arguments which are local directory or remote run names.""" +def filter_local_file_remote_run( + run_args: List[str] +) -> Tuple[List[str], List[str], List[str]]: + """ + Filter out arguments which are local directory, baseline files or remote + run names. + """ local_dirs = [] + baseline_files = [] run_names = [] for r in run_args: if os.path.isdir(r): local_dirs.append(os.path.abspath(r)) + elif os.path.isfile(r) and r.endswith(".baseline"): + baseline_files.append(os.path.abspath(r)) else: run_names.append(r) - return local_dirs, run_names + return local_dirs, baseline_files, run_names def run_sort_type_str(value): @@ -822,68 +829,91 @@ def handle_diff_results(args): context = webserver_context.get_context() source_line_contents = {} - def get_diff_local_dir_remote_run(client, report_dirs, remote_run_names): - """Compare a local report directory with a remote run.""" + def get_diff_local_dir_remote_run( + client, + report_dirs: List[str], + baseline_files: List[str], + remote_run_names: List[str] + ): + """ Compare a local report directory with a remote run. """ filtered_reports = [] - report_dir_results = get_report_dir_results(report_dirs, - args, - context.severity_map) + filtered_report_hashes = set() + + report_dir_results = get_report_dir_results( + report_dirs, args, context.severity_map) suppressed_in_code = get_suppressed_reports(report_dir_results, args) diff_type = get_diff_type(args) run_ids, run_names, tag_ids = \ process_run_args(client, remote_run_names) local_report_hashes = set([r.report_hash for r in report_dir_results]) + local_report_hashes.update(baseline.get_report_hashes(baseline_files)) if diff_type == ttypes.DiffType.NEW: # Get report hashes which can be found only in the remote runs. - remote_hashes = \ - client.getDiffResultsHash(run_ids, - local_report_hashes, - ttypes.DiffType.RESOLVED, - None, - tag_ids) - - results = get_diff_base_results(client, args, run_ids, - remote_hashes, - suppressed_in_code) + remote_hashes = client.getDiffResultsHash( + run_ids, local_report_hashes, ttypes.DiffType.RESOLVED, + None, tag_ids) + + results = get_diff_base_results( + client, args, run_ids, remote_hashes, suppressed_in_code) + for result in results: filtered_reports.append(result) elif diff_type == ttypes.DiffType.UNRESOLVED: # Get remote hashes which can be found in the remote run and in the # local report directory. - remote_hashes = \ - client.getDiffResultsHash(run_ids, - local_report_hashes, - ttypes.DiffType.UNRESOLVED, - None, - tag_ids) + remote_hashes = client.getDiffResultsHash( + run_ids, local_report_hashes, ttypes.DiffType.UNRESOLVED, + None, tag_ids) + + filtered_report_hashes = local_report_hashes.copy() for result in report_dir_results: rep_h = result.report_hash + filtered_report_hashes.discard(rep_h) if rep_h in remote_hashes and rep_h not in suppressed_in_code: filtered_reports.append(result) + filtered_report_hashes &= set(remote_hashes) + + # Try to get missing report from the server based on the report + # hashes. + if filtered_report_hashes: + results = get_diff_base_results( + client, args, run_ids, list(filtered_report_hashes), + suppressed_in_code) + + for result in results: + filtered_report_hashes.discard(result.bugHash) + filtered_reports.append(result) elif diff_type == ttypes.DiffType.RESOLVED: # Get remote hashes which can be found in the remote run and in the # local report directory. - remote_hashes = \ - client.getDiffResultsHash(run_ids, - local_report_hashes, - ttypes.DiffType.UNRESOLVED, - None, - tag_ids) + remote_hashes = client.getDiffResultsHash( + run_ids, local_report_hashes, ttypes.DiffType.UNRESOLVED, + None, tag_ids) + + filtered_report_hashes = local_report_hashes.copy() for result in report_dir_results: + filtered_report_hashes.discard(result.report_hash) if result.report_hash not in remote_hashes: filtered_reports.append(result) - return filtered_reports, run_names + filtered_report_hashes -= set(remote_hashes) - def get_diff_remote_run_local_dir(client, remote_run_names, report_dirs): - """ - Compares a remote run with a local report directory. - """ + return filtered_reports, filtered_report_hashes, run_names + + def get_diff_remote_run_local_dir( + client, + remote_run_names: List[str], + report_dirs: List[str], + baseline_files: List[str] + ): + """ Compares a remote run with a local report directory. """ filtered_reports = [] - report_dir_results = get_report_dir_results(report_dirs, - args, - context.severity_map) + filtered_report_hashes = [] + + report_dir_results = get_report_dir_results( + report_dirs, args, context.severity_map) + suppressed_in_code = get_suppressed_reports(report_dir_results, args) diff_type = get_diff_type(args) @@ -891,35 +921,36 @@ def get_diff_remote_run_local_dir(client, remote_run_names, report_dirs): process_run_args(client, remote_run_names) local_report_hashes = set([r.report_hash for r in report_dir_results]) - remote_hashes = client.getDiffResultsHash(run_ids, - local_report_hashes, - diff_type, - None, - tag_ids) + local_report_hashes = local_report_hashes.union( + baseline.get_report_hashes(baseline_files)) + + remote_hashes = client.getDiffResultsHash( + run_ids, local_report_hashes, diff_type, None, tag_ids) if not remote_hashes: - return filtered_reports, run_names + return filtered_reports, filtered_report_hashes, run_names if diff_type in [ttypes.DiffType.NEW, ttypes.DiffType.UNRESOLVED]: # Shows reports from the report dir which are not present in # the baseline (NEW reports) or appear in both side (UNRESOLVED # reports) and not suppressed in the code. + filtered_report_hashes = set(remote_hashes) + for result in report_dir_results: rep_h = result.report_hash + filtered_report_hashes.discard(rep_h) if rep_h in remote_hashes and rep_h not in suppressed_in_code: filtered_reports.append(result) elif diff_type == ttypes.DiffType.RESOLVED: # Show bugs in the baseline (server) which are not present in # the report dir or suppressed. - results = get_diff_base_results(client, - args, - run_ids, - remote_hashes, - suppressed_in_code) + results = get_diff_base_results( + client, args, run_ids, remote_hashes, suppressed_in_code) + for result in results: filtered_reports.append(result) - return filtered_reports, run_names + return filtered_reports, filtered_report_hashes, run_names def get_diff_remote_runs(client, remote_base_run_names, remote_new_run_names): @@ -963,17 +994,23 @@ def get_diff_remote_runs(client, remote_base_run_names, return all_results, base_run_names, new_run_names - def get_diff_local_dirs(base_run_names, new_run_names): + def get_diff_local_dirs( + report_dirs: List[str], + baseline_files: List[str], + new_report_dirs: List[str], + new_baseline_files: List[str] + ) -> Tuple[List[Report], List[str]]: """ Compares two report directories and returns the filtered results. """ filtered_reports = [] - base_results = get_report_dir_results(base_run_names, - args, - context.severity_map) - new_results = get_report_dir_results(new_run_names, - args, - context.severity_map) + filtered_report_hashes = [] + + base_results = get_report_dir_results( + report_dirs, args, context.severity_map) + + new_results = get_report_dir_results( + new_report_dirs, args, context.severity_map) new_results = [res for res in new_results if res.check_source_code_comments(args.review_status)] @@ -981,21 +1018,34 @@ def get_diff_local_dirs(base_run_names, new_run_names): base_hashes = set([res.report_hash for res in base_results]) new_hashes = set([res.report_hash for res in new_results]) + # Add hashes from the baseline files. + base_hashes.update(baseline.get_report_hashes(baseline_files)) + new_hashes.update(baseline.get_report_hashes(new_baseline_files)) + diff_type = get_diff_type(args) if diff_type == ttypes.DiffType.NEW: + filtered_report_hashes = new_hashes.copy() for res in new_results: + filtered_report_hashes.discard(res.report_hash) + if res.report_hash not in base_hashes: filtered_reports.append(res) if diff_type == ttypes.DiffType.UNRESOLVED: + filtered_report_hashes = new_hashes.copy() for res in new_results: + filtered_report_hashes.discard(res.report_hash) + if res.report_hash in base_hashes: filtered_reports.append(res) elif diff_type == ttypes.DiffType.RESOLVED: + filtered_report_hashes = base_hashes.copy() for res in base_results: + filtered_report_hashes.discard(res.report_hash) + if res.report_hash not in new_hashes: filtered_reports.append(res) - return filtered_reports + return filtered_reports, filtered_report_hashes def cached_report_file_lookup(file_cache, file_id): """ @@ -1115,9 +1165,15 @@ def report_to_html(client, reports, output_dir): html_builder.create_index_html(output_dir) print_stats(len(reports), file_stats, severity_stats) - def print_reports(client, - reports: List[Report], - output_formats: List[str]): + def print_reports( + client, + reports: List[Report], + report_hashes: Iterable[str], + output_formats: List[str] + ): + if report_hashes: + LOG.info("Couldn't get local reports for the following baseline " + "report hashes: %s", ', '.join(sorted(report_hashes))) selected_output_format_num = len(output_formats) @@ -1288,24 +1344,26 @@ def print_reports(client, "analyze your project again to update the " "reports!", changed_f) - basename_local_dirs, basename_run_names = \ - filter_localdir_remote_run(args.base_run_names) + basename_local_dirs, basename_baseline_files, basename_run_names = \ + filter_local_file_remote_run(args.base_run_names) - newname_local_dirs, newname_run_names = \ - filter_localdir_remote_run(args.new_run_names) + newname_local_dirs, newname_baseline_files, newname_run_names = \ + filter_local_file_remote_run(args.new_run_names) has_different_run_args = False - if basename_local_dirs and basename_run_names: + if (basename_local_dirs or basename_baseline_files) and basename_run_names: LOG.error("All base run names must have the same type: local " - "directory (%s) or run names (%s).", + "directory (%s) / baseline files (%s) or run names (%s).", ', '.join(basename_local_dirs), + ', '.join(basename_baseline_files), ', '.join(basename_run_names)) has_different_run_args = True if newname_local_dirs and newname_run_names: LOG.error("All new run names must have the same type: local " - "directory (%s) or run names (%s).", + "directory (%s) / baseline files (%s) or run names (%s).", ', '.join(newname_local_dirs), + ', '.join(newname_baseline_files), ', '.join(newname_run_names)) has_different_run_args = True @@ -1324,12 +1382,20 @@ def print_reports(client, if basename_local_dirs: LOG.info("Matching local report directories (--baseline): %s", ', '.join(basename_local_dirs)) + if basename_baseline_files: + LOG.info("Matching local baseline files (--baseline): %s", + ', '.join(basename_baseline_files)) + if newname_local_dirs: LOG.info("Matching local report directories (--newname): %s", ', '.join(newname_local_dirs)) + if newname_baseline_files: + LOG.info("Matching local baseline files (--newname): %s", + ', '.join(newname_baseline_files)) client = None - # We set up the client if we are not comparing two local report directory. + # We set up the client if we are not comparing two local report directories + # or baseline files. if basename_run_names or newname_run_names: if basename_run_names: LOG.info("Given remote runs (--baseline): %s", @@ -1347,40 +1413,45 @@ def print_reports(client, args.product_url) raise sexit - if basename_local_dirs and newname_local_dirs: - reports = get_diff_local_dirs(basename_local_dirs, - newname_local_dirs) - print_reports(client, reports, args.output_format) - LOG.info("Compared the following local report directories: %s and %s", - ', '.join(basename_local_dirs), - ', '.join(newname_local_dirs)) - elif newname_local_dirs: - reports, matching_base_run_names = \ - get_diff_remote_run_local_dir(client, - basename_run_names, - newname_local_dirs) - print_reports(client, reports, args.output_format) - LOG.info("Compared remote run(s) %s (matching: %s) and local report " - "directory(s) %s", + report_hashes = [] + if (basename_local_dirs or basename_baseline_files) and \ + (newname_local_dirs or newname_baseline_files): + reports, report_hashes = get_diff_local_dirs( + basename_local_dirs, basename_baseline_files, + newname_local_dirs, newname_baseline_files) + + print_reports(client, reports, report_hashes, args.output_format) + LOG.info("Compared the following local files / directories: %s and %s", + ', '.join([*basename_local_dirs, *basename_baseline_files]), + ', '.join([*newname_local_dirs, *newname_baseline_files])) + elif newname_local_dirs or newname_baseline_files: + reports, report_hashes, matching_base_run_names = \ + get_diff_remote_run_local_dir( + client, basename_run_names, + newname_local_dirs, newname_baseline_files) + + print_reports(client, reports, report_hashes, args.output_format) + LOG.info("Compared remote run(s) %s (matching: %s) and local files / " + "report directory(s) %s", ', '.join(basename_run_names), ', '.join(matching_base_run_names), - ', '.join(newname_local_dirs)) - elif basename_local_dirs: - reports, matching_new_run_names = \ - get_diff_local_dir_remote_run(client, - basename_local_dirs, - newname_run_names) - - print_reports(client, reports, args.output_format) - LOG.info("Compared local report directory(s) %s and remote run(s) %s " - "(matching: %s).", - ', '.join(basename_local_dirs), + ', '.join([*newname_local_dirs, *newname_baseline_files])) + elif (basename_local_dirs or basename_baseline_files): + reports, report_hashes, matching_new_run_names = \ + get_diff_local_dir_remote_run( + client, basename_local_dirs, basename_baseline_files, + newname_run_names) + + print_reports(client, reports, report_hashes, args.output_format) + LOG.info("Compared local files / report directory(s) %s and remote " + "run(s) %s (matching: %s).", + ', '.join([*basename_local_dirs, *basename_baseline_files]), ', '.join(newname_run_names), ', '.join(matching_new_run_names)) else: reports, matching_base_run_names, matching_new_run_names = \ get_diff_remote_runs(client, basename_run_names, newname_run_names) - print_reports(client, reports, args.output_format) + print_reports(client, reports, None, args.output_format) LOG.info("Compared multiple remote runs %s (matching: %s) and %s " "(matching: %s)", ', '.join(basename_run_names), @@ -1388,7 +1459,7 @@ def print_reports(client, ', '.join(newname_run_names), ', '.join(matching_new_run_names)) - if len(reports) != 0: + if len(reports) != 0 or len(report_hashes) != 0: sys.exit(2) diff --git a/web/tests/functional/diff_local/test_diff_local.py b/web/tests/functional/diff_local/test_diff_local.py index d1715d8497..d3d1743101 100644 --- a/web/tests/functional/diff_local/test_diff_local.py +++ b/web/tests/functional/diff_local/test_diff_local.py @@ -21,7 +21,7 @@ import unittest from libtest import env, codechecker -from libtest.codechecker import get_diff_results +from libtest.codechecker import create_baseline_file, get_diff_results class DiffLocal(unittest.TestCase): @@ -267,3 +267,116 @@ def test_suppress_reports(self): res, _, _ = get_diff_results( [report_dir_base], [report_dir_new], '--resolved', 'json') self.assertEqual(len(res), 2) + + def test_basename_baseline_file_json(self): + """ + Get reports based on a baseline file given to the basename option. + """ + baseline_file_path = create_baseline_file(self.base_reports) + + # Get new results. + new_results, _, _ = get_diff_results( + [baseline_file_path], [self.new_reports], '--new', 'json') + + print(new_results) + + for new_result in new_results: + self.assertEqual(new_result['checkerId'], "core.NullDereference") + + # Get unresolved results. + unresolved_results, _, _ = get_diff_results( + [baseline_file_path], [self.new_reports], '--unresolved', 'json') + + print(unresolved_results) + + self.assertTrue(any( + r for r in unresolved_results + if r['checkerId'] == 'core.DivideZero')) + + self.assertFalse(any( + r for r in unresolved_results + if r['checkerId'] == 'core.NullDereference' or + r['checkerId'] == 'core.CallAndMessage')) + + # Get resolved results. + resolved_results, err, returncode = get_diff_results( + [baseline_file_path], [self.new_reports], '--resolved', 'json') + + self.assertFalse(resolved_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: ", + err) + + def test_newname_baseline_file_json(self): + """ + Get reports based on a baseline file given to the newname option. + """ + baseline_file_path = create_baseline_file(self.new_reports) + + # Get new results. + new_results, err, returncode = get_diff_results( + [self.base_reports], [baseline_file_path], '--new', 'json') + + self.assertFalse(new_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: ", + err) + + # Get unresolved results. + unresolved_results, err, returncode = get_diff_results( + [self.base_reports], [baseline_file_path], '--unresolved', 'json') + + self.assertFalse(unresolved_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: ", + err) + + # Get resolved results. + resolved_results, _, _ = get_diff_results( + [self.base_reports], [baseline_file_path], '--resolved', 'json') + + for report in resolved_results: + self.assertEqual(report['checkerId'], "core.CallAndMessage") + + def test_multiple_baseline_file_json(self): + """ Test multiple baseline file for basename option. """ + baseline_file_paths = [ + create_baseline_file(self.base_reports), + create_baseline_file(self.new_reports)] + + # Get new results. + new_results, _, returncode = get_diff_results( + baseline_file_paths, [self.new_reports], '--new', 'json') + + print(new_results) + + self.assertFalse(new_results) + self.assertFalse(returncode) + + # Get unresolved results. + unresolved_results, _, returncode = get_diff_results( + baseline_file_paths, [self.new_reports], '--unresolved', 'json') + print(unresolved_results) + + self.assertTrue(any( + r for r in unresolved_results + if r['checkerId'] == 'core.DivideZero')) + + # Get resolved results. + resolved_results, err, returncode = get_diff_results( + baseline_file_paths, [self.new_reports], '--resolved', 'json') + + print(resolved_results) + + self.assertFalse(resolved_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: ", + err) diff --git a/web/tests/functional/diff_local_remote/test_diff_local_remote.py b/web/tests/functional/diff_local_remote/test_diff_local_remote.py index 323b109fb9..72138c7567 100644 --- a/web/tests/functional/diff_local_remote/test_diff_local_remote.py +++ b/web/tests/functional/diff_local_remote/test_diff_local_remote.py @@ -22,7 +22,7 @@ import unittest from libtest import env -from libtest.codechecker import get_diff_results +from libtest.codechecker import create_baseline_file, get_diff_results class LocalRemote(unittest.TestCase): @@ -559,3 +559,110 @@ def test_diff_remote_local_resolved_same(self): [self._run_names[0]], [self._remote_reports], '--resolved', 'json', ["--url", self._url]) self.assertEqual(out, []) + + def test_local_to_remote_with_baseline_file(self): + """ + Get reports based on a baseline file given to the basename option. + """ + baseline_file_path = create_baseline_file(self._local_reports) + + # Get new reports. + new_results, _, returncode = get_diff_results( + [baseline_file_path], [self._run_names[0]], '--new', 'json', + ["--url", self._url]) + print(new_results) + + for report in new_results: + self.assertEqual(report['checkerId'], "core.NullDereference") + + self.assertEqual(returncode, 2) + + # Get unresolved reports. + unresolved_results, err, returncode = get_diff_results( + [baseline_file_path], [self._run_names[0]], '--unresolved', 'json', + ["--url", self._url]) + print(unresolved_results) + + self.assertTrue(unresolved_results) + self.assertFalse(any( + r for r in unresolved_results + if r['checkerId'] == 'core.CallAndMessage')) + self.assertEqual(returncode, 2) + + # Get resolved reports. + resolved_results, err, returncode = get_diff_results( + [baseline_file_path], [self._run_names[0]], '--resolved', 'json', + ["--url", self._url]) + print(resolved_results) + + self.assertFalse(resolved_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: ", + err) + + def test_remote_to_local_with_baseline_file(self): + """ + Get reports based on a baseline file given to the newname option. + """ + baseline_file_path = create_baseline_file(self._local_reports) + + # Get new reports. + res, _, _ = get_diff_results( + [self._run_names[0]], [self._local_reports], + '--new', 'json', + ["--url", self._url, + "--review-status", "unreviewed", "confirmed", "false_positive"]) + new_hashes = sorted(set([n['bugHash'] for n in res])) + + new_results, err, returncode = get_diff_results( + [self._run_names[0]], [baseline_file_path], '--new', 'json', + ["--url", self._url]) + print(new_results) + + self.assertFalse(new_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: " + ', '.join(new_hashes), + err) + + # Get unresolved reports. + res, _, _ = get_diff_results( + [self._run_names[0]], [self._local_reports], + '--unresolved', 'json', + ["--url", self._url, + "--review-status", "unreviewed", "confirmed", "false_positive"]) + unresolved_hashes = sorted(set([n['bugHash'] for n in res])) + + unresolved_results, err, returncode = get_diff_results( + [self._run_names[0]], [baseline_file_path], + '--unresolved', 'json', + ["--url", self._url]) + print(unresolved_results) + + self.assertFalse(unresolved_results) + self.assertEqual(returncode, 2) + self.assertIn( + "Couldn't get local reports for the following baseline report " + "hashes: " + ', '.join(unresolved_hashes), + err) + + # Get resolved reports. + res, _, _ = get_diff_results( + [self._run_names[0]], [self._local_reports], + '--resolved', 'json', + ["--url", self._url, + "--review-status", "unreviewed", "confirmed", "false_positive"]) + resolved_hashes = set([n['bugHash'] for n in res]) + + resolved_results, _, returncode = get_diff_results( + [self._run_names[0]], [baseline_file_path], '--resolved', 'json', + ["--url", self._url]) + print(resolved_results) + + self.assertTrue(resolved_results) + self.assertSetEqual( + {r['bugHash'] for r in resolved_results}, resolved_hashes) + self.assertEqual(returncode, 2) diff --git a/web/tests/libtest/codechecker.py b/web/tests/libtest/codechecker.py index 10e0dd6180..d9e2144eb9 100644 --- a/web/tests/libtest/codechecker.py +++ b/web/tests/libtest/codechecker.py @@ -107,6 +107,20 @@ def get_diff_results(basenames, newnames, diff_type, format_type=None, return out, err, proc.returncode +def create_baseline_file(report_dir: str, cc_env=None) -> str: + """ Create baseline file from the given report directory. """ + parse_cmd = [ + env.codechecker_cmd(), 'parse', report_dir, + '-e', 'baseline', '-o', report_dir] + + proc = subprocess.Popen( + parse_cmd, encoding="utf-8", errors="ignore", env=cc_env, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + + return os.path.join(report_dir, 'reports.baseline') + + def login(codechecker_cfg, test_project_path, username, password, protocol='http'): """