Skip to content

Commit

Permalink
[cli] Local diff workflow support
Browse files Browse the repository at this point in the history
- Extend the `CodeChecker parse` command with an extra baseline output type
which can be used to generate a baseline file which will contain
report hashes for legacy reports.
- Extend the `CodeChecker cmd diff` to support baseline files.
- Add test cases.
- Extend the documentation with the recommended usage of this workflow.
  • Loading branch information
csordasmarton committed Aug 17, 2021
1 parent 35008b9 commit d162500
Show file tree
Hide file tree
Showing 11 changed files with 625 additions and 145 deletions.
31 changes: 23 additions & 8 deletions analyzer/codechecker_analyzer/cmd/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from codechecker_analyzer import analyzer_context, suppress_handler

from codechecker_common import arg, logger, plist_parser, util, cmd_config
from codechecker_common.output import json as out_json, twodim, \
from codechecker_common.output import baseline, json as out_json, twodim, \
codeclimate, gerrit
from codechecker_common.skiplist_handler import SkipListHandler
from codechecker_common.source_code_comment_handler import \
Expand All @@ -37,7 +37,7 @@

LOG = logger.get_logger('system')

EXPORT_TYPES = ['html', 'json', 'codeclimate', 'gerrit']
EXPORT_TYPES = ['html', 'json', 'codeclimate', 'gerrit', 'baseline']

_data_files_dir_path = analyzer_context.get_context().data_files_dir_path
_severity_map_file = os.path.join(_data_files_dir_path, 'config',
Expand Down Expand Up @@ -457,7 +457,11 @@ def add_arguments_to_parser(parser):
"For more information see:\n"
"https://github.com/codeclimate/platform/"
"blob/master/spec/analyzers/SPEC.md"
"#data-types")
"#data-types\n"
"'baseline' output can be used to integrate "
"CodeChecker into your local workflow "
"without using a CodeChecker server. For "
"more information see our usage guide.")

output_opts.add_argument('-o', '--output',
dest="output_path",
Expand Down Expand Up @@ -639,6 +643,9 @@ def _parse_convert_reports(
report.trim_path_prefixes(trim_path_prefixes)

number_of_reports = len(all_reports)
if out_format == "baseline":
return (baseline.convert(all_reports), number_of_reports)

if out_format == "codeclimate":
return (codeclimate.convert(all_reports, severity_map),
number_of_reports)
Expand Down Expand Up @@ -693,11 +700,6 @@ def _generate_json_output(
output_text = json.dumps(reports)

if output_path:
output_path = os.path.abspath(output_path)

if not os.path.exists(output_path):
os.mkdir(output_path)

output_file_path = os.path.join(output_path, 'reports.json')
with open(output_file_path, mode='w', encoding='utf-8',
errors="ignore") as output_f:
Expand Down Expand Up @@ -793,7 +795,20 @@ def main(args):
if 'output_path' in args:
output_path = os.path.abspath(args.output_path)

if not os.path.exists(output_path):
os.makedirs(output_path)

if export:
if export == 'baseline':
report_hashes, number_of_reports = _parse_convert_reports(
args.input, export, context.severity_map, trim_path_prefixes,
skip_handler)

if output_path:
baseline.write(output_path, report_hashes)

sys.exit(2 if number_of_reports else 0)

# The HTML part will be handled separately below.
if export != 'html':
sys.exit(_generate_json_output(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from libtest import project
from libtest.codechecker import call_command

from codechecker_common.output import baseline


class AnalyzeParseTestCaseMeta(type):
def __new__(mcs, name, bases, test_dict):
Expand Down Expand Up @@ -544,3 +546,47 @@ def test_html_export_exit_code(self):
out, _, result = call_command(extract_cmd, cwd=self.test_dir,
env=self.env)
self.assertEqual(result, 0, "Parsing should not found any issue.")

def test_baseline_output(self):
""" Test parse baseline output. """
output_path = self.test_workspaces['OUTPUT']
out_file_path = os.path.join(output_path, "reports.baseline")

# Analyze the first project.
test_project_notes = os.path.join(
self.test_workspaces['NORMAL'], "test_files", "notes")

extract_cmd = ['CodeChecker', 'parse',
"-e", "baseline",
"-o", output_path,
test_project_notes,
'--trim-path-prefix', test_project_notes]

_, _, result = call_command(
extract_cmd, cwd=self.test_dir, env=self.env)
self.assertEqual(result, 2, "Parsing not found any issue.")

report_hashes = baseline.get_report_hashes([out_file_path])
self.assertEqual(
report_hashes, {'3d15184f38c5fa57e479b744fe3f5035'})

# Analyze the second project and see whether the baseline file is
# merged.
test_project_macros = os.path.join(
self.test_workspaces['NORMAL'], "test_files", "macros")

extract_cmd = ['CodeChecker', 'parse',
"-e", "baseline",
"-o", output_path,
test_project_macros,
'--trim-path-prefix', test_project_macros]

_, _, result = call_command(
extract_cmd, cwd=self.test_dir, env=self.env)
self.assertEqual(result, 2, "Parsing not found any issue.")

report_hashes = baseline.get_report_hashes([out_file_path])
self.assertEqual(
report_hashes, {
'3d15184f38c5fa57e479b744fe3f5035',
'f8fbc46cc5afbb056d92bd3d3d702781'})
76 changes: 76 additions & 0 deletions codechecker_common/output/baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
""" CodeChecker baseline output helpers. """

from io import TextIOWrapper
import os
from typing import Iterable, List, Set

from codechecker_common import logger
from codechecker_common.report import Report


LOG = logger.get_logger('system')


def __get_report_hashes(f: TextIOWrapper) -> List[str]:
""" Get report hashes from the given file. """
return [h for h in f.readlines() if h]


def get_report_hashes(
baseline_file_paths: Iterable[str]
) -> Set[str]:
""" Get uniqued hashes from baseline files. """
report_hashes = set()
for file_path in baseline_file_paths:
with open(file_path, mode='r', encoding='utf-8', errors="ignore") as f:
report_hashes.update(__get_report_hashes(f))

return report_hashes


def convert(reports: Iterable[Report]) -> List[str]:
""" Convert the given reports to CodeChecker baseline format.
Returns a list of sorted unique report hashes.
"""
return sorted(set(r.report_hash for r in reports))


def write(output_dir_path: str, report_hashes: Iterable[str]):
""" Create a new baseline file or extend an existing one with the given
report hashes in the given output directory. It will remove the duplicates
and also sort the report hashes before writing it to a file.
"""
file_path = os.path.join(output_dir_path, 'reports.baseline')
with open(file_path, mode='a+', encoding='utf-8', errors="ignore") as f:
f.seek(0)
old_report_hashes = __get_report_hashes(f)
new_report_hashes = set(report_hashes) - set(old_report_hashes)

if not new_report_hashes:
LOG.info("Baseline file (%s) is up-to-date.", file_path)
return

if old_report_hashes:
LOG.info("Merging existing baseline file: %s", file_path)
else:
LOG.info("Creating new baseline file: %s", file_path)

LOG.info("Total number of old report hashes: %d",
len(old_report_hashes))
LOG.info("Total number of new report hashes: %d",
len(new_report_hashes))

LOG.debug("New report hashes: %s", sorted(new_report_hashes))

f.seek(0)
f.truncate()
f.write("\n".join(sorted(
set([*old_report_hashes, *report_hashes]))))
17 changes: 11 additions & 6 deletions docs/analyzer/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1577,10 +1577,11 @@ Statistics analysis feature arguments:
</summary>

```
Usage: CodeChecker parse [-h] [--config CONFIG_FILE] [-t {plist}]
[-e {html,json,codeclimate,gerrit}] [-o OUTPUT_PATH]
[--suppress SUPPRESS] [--export-source-suppress]
[--print-steps] [-i SKIPFILE]
usage: CodeChecker parse [-h] [--config CONFIG_FILE] [-t {plist}]
[-e {html,json,codeclimate,gerrit,baseline}]
[-o OUTPUT_PATH] [--suppress SUPPRESS]
[--export-source-suppress] [--print-steps]
[-i SKIPFILE]
[--trim-path-prefix [TRIM_PATH_PREFIX [TRIM_PATH_PREFIX ...]]]
[--review-status [REVIEW_STATUS [REVIEW_STATUS ...]]]
[--verbose {info,debug_analyzer,debug}]
Expand Down Expand Up @@ -1643,12 +1644,16 @@ optional arguments:
Set verbosity level.
export arguments:
-e {html,json,codeclimate,gerrit}, --export {html,json,codeclimate,gerrit}
-e {html,json,codeclimate,gerrit,baseline}, --export {html,json,codeclimate,gerrit,baseline}
Specify extra output format type.
'codeclimate' format can be used for Code Climate and
for GitLab integration. For more information see:
https://github.com/codeclimate/platform/blob/master/sp
ec/analyzers/SPEC.md#data-types (default: None)
ec/analyzers/SPEC.md#data-types
'baseline' output can be used to integrate CodeChecker
into your local workflow without using a CodeChecker
server. For more information see our usage guide.
(default: None)
-o OUTPUT_PATH, --output OUTPUT_PATH
Store the output in the given folder.
Expand Down
19 changes: 19 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ It invokes Clang Static Analyzer and Clang-Tidy tools to analyze your code.
- [Alternative 2: Store each analysis in a new run](#storing-new-runs)
- [Gerrit Integration](#gerrit-integration)
- [Setting up user authentication](authentication)
- [Step 9: Integrate CodeChecker into your local workflow](#step-9)
- [Updating CodeChecker to new version](#upgrade)
- [Unique Report Identifier (RI)](#unique-report-identifier)
- [Listing and Counting Reports](#listing-reports)
Expand Down Expand Up @@ -754,6 +755,24 @@ guide.
You can set up authentication for your server and (web,command line) clients
as described in the [Authentication Guide](web/authentication.md).
## Step 9: Integrate CodeChecker into your local workflow <a name="step-9"></a>
If you want to use CodeChecker in your project but you don't want to run a
CodeChecker server and to fix every reports found by CodeChecker on the first
time (legacy findings) you can do the following steps:
1. Analyze your project to a report directory (e.g.: `./reports`). For more
information see [Step 2](#step-2).
2. Create a baseline file from the reports which contains the legacy findings:
`CodeChecker parse ./reports -e baseline -o .`. It is recommended to store
this baseline file (`reports.baseline`) in your repository.
3. On source code changes after your project is re-analyzed use the
CodeChecker diff command to get the new reports:
`CodeChecker cmd diff -b ./reports.baseline -n ./reports --new`
4. On configuration changes (new checkers / options are enabled / disabled,
new CodeChecker / clang version is used, etc.) re-generate the baseline file
(step 1-2).
## Updating CodeChecker to new version <a name="upgrade"></a>
If a new CodeChecker release is available it might be possible that there are
some database changes compared to the previous release. If you run into
Expand Down
37 changes: 22 additions & 15 deletions docs/web/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1092,26 +1092,29 @@ optional arguments:
The 'base' (left) side of the difference: these
analysis runs are used as the initial state in the
comparison. The parameter can be multiple run names
(on the remote server) or multiple local report
directories (result of the analyze command). In case
of run name the the basename can contain * quantifiers
which matches any number of characters (zero or more).
So if you have run-a-1, run-a-2 and run-b-1 then
"run-a*" selects the first two. In case of run names
tag labels can also be used separated by a colon (:)
character: "run_name:tag_name".
-n NEW_RUNS [NEW_RUNS ...], --newname NEW_RUNS [NEW_RUNS ...]
The 'new' (right) side of the difference: these
analysis runs are compared to the -b/--basename runs.
The parameter can be multiple run names (on the remote
server) or multiple local report directories (result
of the analyze command). In case of run name the
newname can contain * quantifiers which matches any
(on the remote server), multiple local report
directories (result of the analyze command) or
baseline files (generated by the 'CodeChecker parse -e
baseline' command). In case of run name the the
basename can contain * quantifiers which matches any
number of characters (zero or more). So if you have
run-a-1, run-a-2 and run-b-1 then "run-a*" selects the
first two. In case of run names tag labels can also be
used separated by a colon (:) character:
"run_name:tag_name".
-n NEW_RUNS [NEW_RUNS ...], --newname NEW_RUNS [NEW_RUNS ...]
The 'new' (right) side of the difference: these
analysis runs are compared to the -b/--basename runs.
The parameter can be multiple run names (on the remote
server), multiple local report directories (result of
the analyze command) or baseline files (generated by
the 'CodeChecker parse -e baseline' command). In case
of run name the newname can contain * quantifiers
which matches any number of characters (zero or more).
So if you have run-a-1, run-a-2 and run-b-1 then
"run-a*" selects the first two. In case of run names
tag labels can also be used separated by a colon (:)
character: "run_name:tag_name".
-o {plaintext,rows,table,csv,json,html,gerrit,codeclimate} [{plaintext,rows,table,csv,json,html,gerrit,codeclimate} ...], --output {plaintext,rows,table,csv,json,html,gerrit,codeclimate} [{plaintext,rows,table,csv,json,html,gerrit,codeclimate} ...]
The output format(s) to use in showing the data.
- html: multiple html files will be generated in the
Expand Down Expand Up @@ -1317,6 +1320,10 @@ exist in the remote run 'run1' but appear in the local report directory:
Compare two runs and show results that exist in both runs and filter results
by multiple severity values:
CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium
Compare a baseline file (generated by the 'CodeChecker parse -e baseline'
command) and a local report directory and show new results:
CodeChecker cmd diff -b /reports.baseline -n /my_report_dir --new
```
</details>

Expand Down
35 changes: 21 additions & 14 deletions web/client/codechecker_client/cmd/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,16 +490,17 @@ def __register_diff(parser):
help="The 'base' (left) side of the difference: these "
"analysis runs are used as the initial state in "
"the comparison. The parameter can be multiple "
"run names (on the remote server) or multiple "
"run names (on the remote server), multiple "
"local report directories (result of the analyze "
"command). In case of run name the the basename "
"can contain * quantifiers which matches any "
"number of characters (zero or more). So if you "
"have run-a-1, run-a-2 and run-b-1 then "
"\"run-a*\" selects the first two. In case of "
"run names tag labels can also be used separated "
"by a colon (:) character: "
"\"run_name:tag_name\".")
"command) or baseline files (generated by the "
"'CodeChecker parse -e baseline' command). In "
"case of run name the the basename can contain * "
"quantifiers which matches any number of "
"characters (zero or more). So if you have "
"run-a-1, run-a-2 and run-b-1 then \"run-a*\" "
"selects the first two. In case of run names tag "
"labels can also be used separated by a colon "
"(:) character: \"run_name:tag_name\".")

parser.add_argument('-n', '--newname',
type=str,
Expand All @@ -510,11 +511,13 @@ def __register_diff(parser):
help="The 'new' (right) side of the difference: these "
"analysis runs are compared to the -b/--basename "
"runs. The parameter can be multiple run names "
"(on the remote server) or multiple local "
"(on the remote server), multiple local "
"report directories (result of the analyze "
"command). In case of run name the newname can "
"contain * quantifiers which matches any number "
"of characters (zero or more). So if you have "
"command) or baseline files (generated by the "
"'CodeChecker parse -e baseline' command). In "
"case of run name the newname can contain * "
"quantifiers which matches any number of "
"characters (zero or more). So if you have "
"run-a-1, run-a-2 and run-b-1 then "
"\"run-a*\" selects the first two. In case of "
"run names tag labels can also be used separated "
Expand Down Expand Up @@ -1342,7 +1345,11 @@ def add_arguments_to_parser(parser):
Compare two runs and show results that exist in both runs and filter results
by multiple severity values:
CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium'''
CodeChecker cmd diff -b run1 -n run2 --unresolved --severity high medium
Compare a baseline file (generated by the 'CodeChecker parse -e baseline'
command) and a local report directory and show new results:
CodeChecker cmd diff -b /reports.baseline -n /my_report_dir --new'''
)
__register_diff(diff)

Expand Down
Loading

0 comments on commit d162500

Please sign in to comment.