Skip to content

Commit

Permalink
Merge pull request Ericsson#2765 from csordasmarton/tu_collector_get_…
Browse files Browse the repository at this point in the history
…dependent_headers

[tools] tu_collector get dependent source files for headers
  • Loading branch information
Gyorgy Orban authored Jun 11, 2020
2 parents b7b0335 + 44d322a commit d8b75d1
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 103 deletions.
74 changes: 5 additions & 69 deletions analyzer/codechecker_analyzer/cmd/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@

import argparse
import collections
import fnmatch
import json
import os
import re
import shutil
import sys

from tu_collector import tu_collector

from codechecker_analyzer import analyzer, analyzer_context, arg, env
from codechecker_analyzer.analyzers import analyzer_types
from codechecker_analyzer.arg import OrderedCheckersAction
Expand Down Expand Up @@ -645,24 +642,7 @@ def check_config_file(args):
LOG.debug("Config file '%s' is available but disabled.", args.config_file)


def __get_source_dependencies(compile_commands):
""" Returns a set of dependencies for each files in each translation unit.
"""
dependencies = collections.defaultdict(set)
for build_action in compile_commands:
files, _ = tu_collector.get_dependent_headers(
build_action['command'],
build_action['directory'])

source_file = os.path.join(build_action['directory'],
build_action['file'])
for f in files:
dependencies[f].add(source_file)

return dependencies


def __get_skip_handler(args, compile_commands):
def __get_skip_handler(args):
"""
Initialize and return a skiplist handler if
there is a skip list file in the arguments or files options is provided.
Expand All @@ -683,51 +663,7 @@ def __get_skip_handler(args, compile_commands):

if skip_file_content:
LOG.debug_analyzer("Creating skiplist handler.")
handler = skiplist_handler.SkipListHandler(skip_file_content)

analyze_headers = []

# Check whether the skip file contains a header file which is not
# skipped.
for skip_line in handler.skip_file_lines:
if skip_line[0] == '+' and \
skip_line.lower().endswith((".h", ".hh", ".hpp")):

norm_skip_path = os.path.normpath(skip_line[1:].strip())
rexpr = re.compile(
fnmatch.translate(norm_skip_path + '*'))
analyze_headers.append((skip_line, rexpr))

# Get source files which depend on the previously collected header
# files and create a new skip list handler where we include these
# files in the beginning.
if analyze_headers:
LOG.info("Get source files which depend on some header files and "
"should be analyzed by your skip file.")

dependencies = __get_source_dependencies(compile_commands)

analyze_header_deps = []
for f, deps in dependencies.items():
for _, rexpr in analyze_headers:
if rexpr.match(f):
analyze_header_deps.extend(["+" + d for d in deps])

if analyze_header_deps:
LOG.info("Your skip file contained some header files (%s) to "
"be analyzed. Analysis can not be executed on header "
"files only. For this reason CodeChecker will "
"analyze the following source files which include "
"the header files:\n%s",
', '.join([f for (f, _) in analyze_headers]),
'\n'.join([" " + f for f in analyze_header_deps]))

skip_file_content = \
"\n".join(analyze_header_deps) + "\n" + skip_file_content

return skiplist_handler.SkipListHandler(skip_file_content)

return handler
return skiplist_handler.SkipListHandler(skip_file_content)


def __update_skip_file(args):
Expand Down Expand Up @@ -839,10 +775,8 @@ def main(args):
LOG.error("Checker option in wrong format: %s", config)
sys.exit(1)

compile_commands = load_json_or_empty(args.logfile, default={})

# Process the skip list if present.
skip_handler = __get_skip_handler(args, compile_commands)
skip_handler = __get_skip_handler(args)

# Enable alpha uniqueing by default if ctu analysis is used.
if 'none' in args.compile_uniqueing and 'ctu_phases' in args:
Expand Down Expand Up @@ -883,6 +817,8 @@ def main(args):
analyzer_env = env.extend(context.path_env_extra,
context.ld_lib_path_extra)

compile_commands = load_json_or_empty(args.logfile, default={})

# Number of all the compilation commands in the parsed log files,
# logged by the logger.
all_cmp_cmd_count = len(compile_commands)
Expand Down
2 changes: 0 additions & 2 deletions analyzer/tests/functional/skip/test_files/multiple/skipfile

This file was deleted.

31 changes: 28 additions & 3 deletions analyzer/tests/functional/skip/test_skip.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ def setUp(self):

# Get the CodeChecker cmd if needed for the tests.
self._codechecker_cmd = env.codechecker_cmd()
self._tu_collector_cmd = env.tu_collector_cmd()
self.report_dir = os.path.join(self.test_workspace, "reports")
self.test_dir = os.path.join(os.path.dirname(__file__), 'test_files')

def test_skip(self):
def __test_skip(self):
"""Analyze a project with a skip file."""
test_dir = os.path.join(self.test_dir, "simple")
build_json = os.path.join(self.test_workspace, "build.json")
Expand Down Expand Up @@ -120,10 +121,34 @@ def test_analyze_only_header(self):
encoding="utf-8", errors="ignore")
print(out)

# Create and run analyze command.
# Use tu_collector to get source file dependencies for a header file
# and create a skip file from it.
deps_cmd = [self._tu_collector_cmd, "-l", build_json,
"--dependents", "--filter", "*/lib.h"]

try:
output = subprocess.check_output(
deps_cmd,
cwd=test_dir,
encoding="utf-8",
errors="ignore")

source_files = output.splitlines()
except subprocess.CalledProcessError as cerr:
print("Failed to run: " + ' '.join(cerr.cmd))
print(cerr.output)

skip_file = os.path.join(self.test_workspace, "skipfile")
with open(skip_file, 'w', encoding="utf-8", errors="ignore") as skip_f:
# Include all source file dependencies.
skip_f.write("\n".join(["+" + s for s in source_files]))

# Skip all other files.
skip_f.write("-*")

analyze_cmd = [self._codechecker_cmd, "analyze", "-c", build_json,
"--analyzers", "clangsa",
"--ignore", "skipfile",
"--ignore", skip_file,
"-o", self.report_dir]

process = subprocess.Popen(
Expand Down
4 changes: 4 additions & 0 deletions analyzer/tests/libtest/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def codechecker_cmd():
return os.path.join(PKG_ROOT, 'bin', 'CodeChecker')


def tu_collector_cmd():
return os.path.join(PKG_ROOT, 'bin', 'tu_collector')


def get_workspace(test_id='test'):
""" return a temporary workspace for the tests """
workspace_root = os.environ.get("CC_TEST_WORKSPACE_ROOT")
Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Useful tools that can also be used outside CodeChecker.
* [Build Logger (to generate JSON Compilation Database from your builds)](analyzer/tools/build-logger)
* [Plist to HTML converter (to generate HTML files from the given plist files)](/tools/plist_to_html/README.md)
* [Report Converter Tool (to convert analysis results from other analyzers to the codechecker report directory format))](/tools/report-converter/README.md)
* [Translation Unit Collector (to collect source files of a translation unit)](/tools/tu_collector/README.md)
* [Translation Unit Collector (to collect source files of a translation unit or to get source files which depend on the given header files)](/tools/tu_collector/README.md)
* [Report Hash generator (to generate unique hash identifiers for reports)](/tools/codechecker_report_hash/README.md)

## Helper Scripts
Expand Down
92 changes: 82 additions & 10 deletions tools/tu_collector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,103 @@ make package

## Usage
```sh
usage: tu_collector.py [-h] (-b COMMAND | -l LOGFILE) -z ZIP [-f FILTER]
usage: tu_collector [-h] (-b COMMAND | -l LOGFILE) [-f FILTER] (-z ZIP | -d)
[-v]

This script collects all the source files constituting specific translation
units. The files are written to a ZIP file which will contain the sources
preserving the original directory hierarchy.
This script can be used for multiple purposes:
- It can be used to collect all the source files constituting specific
translation units. The files are written to a ZIP file which will contain the
sources preserving the original directory hierarchy.
- It can be used to get source files which depend on a given header file.

optional arguments:
-h, --help show this help message and exit
-z ZIP, --zip ZIP Output ZIP file.
-f FILTER, --filter FILTER
This flag restricts the collection on the build
actions of which the compiled source file matches this
path. E.g.: /path/to/*/files
If '--zip' option is given this flag restricts the
collection on the build actions of which the compiled
source file matches this path. If '--dependents'
option is given this flag specify a header file to get
source file dependencies for. E.g.: /path/to/*/files
-v, --verbose Enable debug level logging.

log arguments:

Specify how the build information database should be obtained. You need to
specify either an already existing log file, or a build command which will
be used to generate a log file on the fly.
specify either an already existing log file, or a build command which will be
used to generate a log file on the fly.

-b COMMAND, --build COMMAND
Execute and record a build command. Build commands can
be simple calls to 'g++' or 'clang++'.
-l LOGFILE, --logfile LOGFILE
Use an already existing JSON compilation command
database file specified at this path.

output arguments:
Specify the output type.

-z ZIP, --zip ZIP Output ZIP file.
-d, --dependents Use this flag to return a list of source files which
depend on some header files specified by the --filter
option. The result will not contain header files, even
if those are dependents as well.
```
## Get source files which include a specific header file
Header files can not be analyzed without a C/C++ file. If you change a header
file this tool can be used to find all the C/C++ source files including that
header file. You can create a skip file and include only these source files
so the header file will be actually "analyzed".
**WARNING**: full compilation database is required to collect this information.
### Get source file dependencies for a header
You can use this tool to get all source file dependencies for a given header
file:
```sh
# Using absolute path.
tu_collector --dependents -l ./full_compilation_database.json -f "/path/to/main.h"

# Using relative path.
tu_collector --dependents -l ./full_compilation_database.json -f "*/main.h"
```
### Create skip file from source files that need to be reanalyzed
You can use this tool to get all source file dependencies for all the changed
header files in a git commit and create a skip file from all source files that
need to be reanalyzed by the `CodeChecker analyze` command:
```sh
#!/bin/bash

# Full compilation database file.
compilation_database="./full_compilation_database.json"

# Skip file for CodeChecker analyze command.
skip_file="./skipfile"

# Remove skip file if exists.
rm -rf $skip_file

# Use git to get changed header files, use tu_collector to get all source files
# that need to be reanalyzed and include them in the skip file.
changed_header_files=$(git diff --name-only HEAD^ -- '*.h' '*.hpp')
for changed_header in $changed_header_files; do
source_files=$(tu_collector --dependents -l "$compilation_database" -f "*$changed_header")
for source_file in $source_files; do
echo "+$(pwd)/$source_file" >> $skip_file;
done
done

# Use git to get changed source files and include them in the skip file.
changed_source_files=$(git diff --name-only HEAD^ -- '*.c' '*.cpp')
for source_file in $changed_source_files; do
echo "+$(pwd)/$source_file" >> $skip_file;
done

# Exclude every other files from the analysis.
echo "-*" >> $skip_file
```
## License
Expand Down
Loading

0 comments on commit d8b75d1

Please sign in to comment.