Merge pull request Ericsson#2765 from csordasmarton/tu_collector_get_…

…dependent_headers [tools] tu_collector get dependent source files for headers
lijiaying · Jun 11, 2020 · d8b75d1 · d8b75d1
2 parents b7b0335 + 44d322a
commit d8b75d1
Show file tree

Hide file tree

Showing 7 changed files with 193 additions and 103 deletions.
diff --git a/analyzer/codechecker_analyzer/cmd/analyze.py b/analyzer/codechecker_analyzer/cmd/analyze.py
@@ -12,15 +12,12 @@
 
 import argparse
 import collections
-import fnmatch
 import json
 import os
 import re
 import shutil
 import sys
 
-from tu_collector import tu_collector
-
 from codechecker_analyzer import analyzer, analyzer_context, arg, env
 from codechecker_analyzer.analyzers import analyzer_types
 from codechecker_analyzer.arg import OrderedCheckersAction
@@ -645,24 +642,7 @@ def check_config_file(args):
     LOG.debug("Config file '%s' is available but disabled.", args.config_file)
 
 
-def __get_source_dependencies(compile_commands):
-    """ Returns a set of dependencies for each files in each translation unit.
-    """
-    dependencies = collections.defaultdict(set)
-    for build_action in compile_commands:
-        files, _ = tu_collector.get_dependent_headers(
-            build_action['command'],
-            build_action['directory'])
-
-        source_file = os.path.join(build_action['directory'],
-                                   build_action['file'])
-        for f in files:
-            dependencies[f].add(source_file)
-
-    return dependencies
-
-
-def __get_skip_handler(args, compile_commands):
+def __get_skip_handler(args):
     """
     Initialize and return a skiplist handler if
     there is a skip list file in the arguments or files options is provided.
@@ -683,51 +663,7 @@ def __get_skip_handler(args, compile_commands):
 
     if skip_file_content:
         LOG.debug_analyzer("Creating skiplist handler.")
-        handler = skiplist_handler.SkipListHandler(skip_file_content)
-
-        analyze_headers = []
-
-        # Check whether the skip file contains a header file which is not
-        # skipped.
-        for skip_line in handler.skip_file_lines:
-            if skip_line[0] == '+' and \
-              skip_line.lower().endswith((".h", ".hh", ".hpp")):
-
-                norm_skip_path = os.path.normpath(skip_line[1:].strip())
-                rexpr = re.compile(
-                    fnmatch.translate(norm_skip_path + '*'))
-                analyze_headers.append((skip_line, rexpr))
-
-        # Get source files which depend on the previously collected header
-        # files and create a new skip list handler where we include these
-        # files in the beginning.
-        if analyze_headers:
-            LOG.info("Get source files which depend on some header files and "
-                     "should be analyzed by your skip file.")
-
-            dependencies = __get_source_dependencies(compile_commands)
-
-            analyze_header_deps = []
-            for f, deps in dependencies.items():
-                for _, rexpr in analyze_headers:
-                    if rexpr.match(f):
-                        analyze_header_deps.extend(["+" + d for d in deps])
-
-            if analyze_header_deps:
-                LOG.info("Your skip file contained some header files (%s) to "
-                         "be analyzed. Analysis can not be executed on header "
-                         "files only. For this reason CodeChecker will "
-                         "analyze the following source files which include "
-                         "the header files:\n%s",
-                         ', '.join([f for (f, _) in analyze_headers]),
-                         '\n'.join([" " + f for f in analyze_header_deps]))
-
-                skip_file_content = \
-                    "\n".join(analyze_header_deps) + "\n" + skip_file_content
-
-                return skiplist_handler.SkipListHandler(skip_file_content)
-
-        return handler
+        return skiplist_handler.SkipListHandler(skip_file_content)
 
 
 def __update_skip_file(args):
@@ -839,10 +775,8 @@ def main(args):
                 LOG.error("Checker option in wrong format: %s", config)
                 sys.exit(1)
 
-    compile_commands = load_json_or_empty(args.logfile, default={})
-
     # Process the skip list if present.
-    skip_handler = __get_skip_handler(args, compile_commands)
+    skip_handler = __get_skip_handler(args)
 
     # Enable alpha uniqueing by default if ctu analysis is used.
     if 'none' in args.compile_uniqueing and 'ctu_phases' in args:
@@ -883,6 +817,8 @@ def main(args):
     analyzer_env = env.extend(context.path_env_extra,
                               context.ld_lib_path_extra)
 
+    compile_commands = load_json_or_empty(args.logfile, default={})
+
     # Number of all the compilation commands in the parsed log files,
     # logged by the logger.
     all_cmp_cmd_count = len(compile_commands)

diff --git a/analyzer/tests/functional/skip/test_files/multiple/skipfile b/analyzer/tests/functional/skip/test_files/multiple/skipfile
diff --git a/analyzer/tests/functional/skip/test_skip.py b/analyzer/tests/functional/skip/test_skip.py
@@ -34,10 +34,11 @@ def setUp(self):
 
         # Get the CodeChecker cmd if needed for the tests.
         self._codechecker_cmd = env.codechecker_cmd()
+        self._tu_collector_cmd = env.tu_collector_cmd()
         self.report_dir = os.path.join(self.test_workspace, "reports")
         self.test_dir = os.path.join(os.path.dirname(__file__), 'test_files')
 
-    def test_skip(self):
+    def __test_skip(self):
         """Analyze a project with a skip file."""
         test_dir = os.path.join(self.test_dir, "simple")
         build_json = os.path.join(self.test_workspace, "build.json")
@@ -120,10 +121,34 @@ def test_analyze_only_header(self):
                                       encoding="utf-8", errors="ignore")
         print(out)
 
-        # Create and run analyze command.
+        # Use tu_collector to get source file dependencies for a header file
+        # and create a skip file from it.
+        deps_cmd = [self._tu_collector_cmd, "-l", build_json,
+                    "--dependents", "--filter", "*/lib.h"]
+
+        try:
+            output = subprocess.check_output(
+                deps_cmd,
+                cwd=test_dir,
+                encoding="utf-8",
+                errors="ignore")
+
+            source_files = output.splitlines()
+        except subprocess.CalledProcessError as cerr:
+            print("Failed to run: " + ' '.join(cerr.cmd))
+            print(cerr.output)
+
+        skip_file = os.path.join(self.test_workspace, "skipfile")
+        with open(skip_file, 'w', encoding="utf-8", errors="ignore") as skip_f:
+            # Include all source file dependencies.
+            skip_f.write("\n".join(["+" + s for s in source_files]))
+
+            # Skip all other files.
+            skip_f.write("-*")
+
         analyze_cmd = [self._codechecker_cmd, "analyze", "-c", build_json,
                        "--analyzers", "clangsa",
-                       "--ignore", "skipfile",
+                       "--ignore", skip_file,
                        "-o", self.report_dir]
 
         process = subprocess.Popen(

diff --git a/analyzer/tests/libtest/env.py b/analyzer/tests/libtest/env.py
@@ -33,6 +33,10 @@ def codechecker_cmd():
     return os.path.join(PKG_ROOT, 'bin', 'CodeChecker')
 
 
+def tu_collector_cmd():
+    return os.path.join(PKG_ROOT, 'bin', 'tu_collector')
+
+
 def get_workspace(test_id='test'):
     """ return a temporary workspace for the tests """
     workspace_root = os.environ.get("CC_TEST_WORKSPACE_ROOT")

diff --git a/docs/README.md b/docs/README.md
@@ -126,7 +126,7 @@ Useful tools that can also be used outside CodeChecker.
 * [Build Logger (to generate JSON Compilation Database from your builds)](analyzer/tools/build-logger) 
 * [Plist to HTML converter (to generate HTML files from the given plist files)](/tools/plist_to_html/README.md)
 * [Report Converter Tool (to convert analysis results from other analyzers to the codechecker report directory format))](/tools/report-converter/README.md)
-* [Translation Unit Collector (to collect source files of a translation unit)](/tools/tu_collector/README.md)
+* [Translation Unit Collector (to collect source files of a translation unit or to get source files which depend on the given header files)](/tools/tu_collector/README.md)
 * [Report Hash generator (to generate unique hash identifiers for reports)](/tools/codechecker_report_hash/README.md)
 
 ## Helper Scripts

diff --git a/tools/tu_collector/README.md b/tools/tu_collector/README.md
@@ -16,31 +16,103 @@ make package
 
 ## Usage
 ```sh
-usage: tu_collector.py [-h] (-b COMMAND | -l LOGFILE) -z ZIP [-f FILTER]
+usage: tu_collector [-h] (-b COMMAND | -l LOGFILE) [-f FILTER] (-z ZIP | -d)
+                    [-v]
 
-This script collects all the source files constituting specific translation
-units. The files are written to a ZIP file which will contain the sources
-preserving the original directory hierarchy.
+This script can be used for multiple purposes:
+- It can be used to collect all the source files constituting specific
+translation units. The files are written to a ZIP file which will contain the
+sources preserving the original directory hierarchy.
+- It can be used to get source files which depend on a given header file.
 
 optional arguments:
   -h, --help            show this help message and exit
-  -z ZIP, --zip ZIP     Output ZIP file.
   -f FILTER, --filter FILTER
-                        This flag restricts the collection on the build
-                        actions of which the compiled source file matches this
-                        path. E.g.: /path/to/*/files
+                        If '--zip' option is given this flag restricts the
+                        collection on the build actions of which the compiled
+                        source file matches this path. If '--dependents'
+                        option is given this flag specify a header file to get
+                        source file dependencies for. E.g.: /path/to/*/files
+  -v, --verbose         Enable debug level logging.
 
 log arguments:
+
   Specify how the build information database should be obtained. You need to
-  specify either an already existing log file, or a build command which will
-  be used to generate a log file on the fly.
+  specify either an already existing log file, or a build command which will be
+  used to generate a log file on the fly.
 
   -b COMMAND, --build COMMAND
                         Execute and record a build command. Build commands can
                         be simple calls to 'g++' or 'clang++'.
   -l LOGFILE, --logfile LOGFILE
                         Use an already existing JSON compilation command
                         database file specified at this path.
+
+output arguments:
+  Specify the output type.
+
+  -z ZIP, --zip ZIP     Output ZIP file.
+  -d, --dependents      Use this flag to return a list of source files which
+                        depend on some header files specified by the --filter
+                        option. The result will not contain header files, even
+                        if those are dependents as well.
+```
+
+## Get source files which include a specific header file
+Header files can not be analyzed without a C/C++ file. If you change a header
+file this tool can be used to find all the C/C++ source files including that
+header file. You can create a skip file and include only these source files
+so the header file will be actually "analyzed".
+
+**WARNING**: full compilation database is required to collect this information.
+
+### Get source file dependencies for a header
+You can use this tool to get all source file dependencies for a given header
+file:
+
+```sh
+# Using absolute path.
+tu_collector --dependents -l ./full_compilation_database.json -f "/path/to/main.h"
+
+# Using relative path.
+tu_collector --dependents -l ./full_compilation_database.json -f "*/main.h"
+```
+
+### Create skip file from source files that need to be reanalyzed
+You can use this tool to get all source file dependencies for all the changed
+header files in a git commit and create a skip file from all source files that
+need to be reanalyzed by the `CodeChecker analyze` command:
+
+```sh
+#!/bin/bash
+
+# Full compilation database file.
+compilation_database="./full_compilation_database.json"
+
+# Skip file for CodeChecker analyze command.
+skip_file="./skipfile"
+
+# Remove skip file if exists.
+rm -rf $skip_file
+
+# Use git to get changed header files, use tu_collector to get all source files
+# that need to be reanalyzed and include them in the skip file.
+changed_header_files=$(git diff --name-only HEAD^ -- '*.h' '*.hpp')
+for changed_header in $changed_header_files; do
+  source_files=$(tu_collector --dependents -l "$compilation_database" -f "*$changed_header")
+  for source_file in $source_files; do
+    echo "+$(pwd)/$source_file" >> $skip_file;
+  done
+done
+
+# Use git to get changed source files and include them in the skip file.
+changed_source_files=$(git diff --name-only HEAD^ -- '*.c' '*.cpp')
+for source_file in $changed_source_files; do
+  echo "+$(pwd)/$source_file" >> $skip_file;
+done
+
+# Exclude every other files from the analysis.
+echo "-*" >> $skip_file
 ```
 
 ## License