Merge branch 'release/1.0.0' of https://github.com/pablosantiagolopez…

…/detect-secrets into release/1.0.0
jepsenwan · Apr 13, 2021 · 8c10e4e · 8c10e4e
2 parents 57bffac + fcbee98
commit 8c10e4e
Show file tree

Hide file tree

Showing 7 changed files with 418 additions and 199 deletions.
diff --git a/README.md b/README.md
@@ -407,29 +407,38 @@ const secret = "hunter2";
 
 ```bash
 $ detect-secrets audit --help
-usage: detect-secrets audit [-h] [--diff] [--stats] [--json]
-                            filename [filename ...]
+usage: detect-secrets audit [-h] [--diff] [--stats]
+                      [--report] [--only-real | --only-false]
+                      [--json]
+                      filename [filename ...]
 
 Auditing a baseline allows analysts to label results, and optimize plugins for
 the highest signal-to-noise ratio for their environment.
 
 positional arguments:
-  filename    Audit a given baseline file to distinguish the difference
-              between false and true positives.
+  filename      Audit a given baseline file to distinguish the difference
+                between false and true positives.
 
 optional arguments:
-  -h, --help  show this help message and exit
-  --diff      Allows the comparison of two baseline files, in order to
-              effectively distinguish the difference between various plugin
-              configurations.
-  --stats     Displays the results of an interactive auditing session which
-              have been saved to a baseline file.
+  -h, --help    show this help message and exit
+  --diff        Allows the comparison of two baseline files, in order to
+                effectively distinguish the difference between various plugin
+                configurations.
+  --stats       Displays the results of an interactive auditing session which
+                have been saved to a baseline file.
+  --report      Displays a report with the secrets detected
+
+reporting:
+  Display a summary with all the findings and the made decisions. To be used with the report mode (--report).
+
+  --only-real   Only includes real secrets in the report
+  --only-false  Only includes false positives in the report
 
 analytics:
   Quantify the success of your plugins based on the labelled results in your
   baseline. To be used with the statisitcs mode (--stats).
 
-  --json      Outputs results in a machine-readable format.
+  --json        Outputs results in a machine-readable format.
 ```
 
 ## Configuration

diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py
@@ -50,49 +50,70 @@ def get_raw_secret_from_file(
     the secret value (by design). However, we have line numbers, filenames, and how we detected
     it was a secret in the first place, so we can reverse-engineer it.
 
+    :raises: SecretNotFoundOnSpecifiedLineError
+    :raises: NoLineNumberError
+    """
+    if not secret.line_number:
+        raise NoLineNumberError
+
+    for item in get_raw_secrets_from_file(secret, line_getter_factory):
+        return item.secret_value
+
+    raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
+
+
+def get_raw_secrets_from_file(
+    secret: PotentialSecret,
+    line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
+) -> [PotentialSecret]:
+    """
+    We're analyzing the contents straight from the baseline, and therefore, we don't know
+    the secret value (by design). However, we have secret hashes, filenames, and how we detected
+    it was a secret in the first place, so we can reverse-engineer it. This method searches all
+    the occurrences of one secret in one file using one plugin.
+
     :raises: SecretNotFoundOnSpecifiedLineError
     :raises: NoLineNumberError
     """
     plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type))
     line_getter = line_getter_factory(secret.filename)
     is_first_time_opening_file = not line_getter.has_cached_lines
+    all_secrets = []
     while True:
-        if not secret.line_number:
-            raise NoLineNumberError
-
-        try:
-            target_line = line_getter.lines[secret.line_number - 1]
-        except IndexError:
-            raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
-
-        identified_secrets = call_function_with_arguments(
-            plugin.analyze_line,
-            filename=secret.filename,
-            line=target_line,
-            line_number=secret.line_number,
-
-            # We enable eager search, because we *know* there's a secret here -- the baseline
-            # flagged it after all.
-            enable_eager_search=True,
-        )
-
-        for identified_secret in (identified_secrets or []):
-            if identified_secret == secret:
-                return cast(str, identified_secret.secret_value)
-
-        # No secret found -- maybe it's due to invalid file transformation.
-        # However, this only applies to the first execution of the file, since we want a
-        # consistent transformed file.
-        #
-        # NOTE: This is defensive coding. If we assume that this is only run on valid baselines,
-        # then the baseline wouldn't record secrets that were both found with and without an eager
-        # transformer, in the same file.
-        if is_first_time_opening_file and not line_getter.use_eager_transformers:
+        if secret.line_number:
+            try:
+                lines_to_scan = [line_getter.lines[secret.line_number - 1]]
+                line_numbers = [secret.line_number - 1]
+            except IndexError:
+                raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
+        else:
+            lines_to_scan = line_getter.lines
+            line_numbers = range(len(lines_to_scan))
+
+        for line_number, line in zip(line_numbers, lines_to_scan):
+            identified_secrets = call_function_with_arguments(
+                plugin.analyze_line,
+                filename=secret.filename,
+                line=line,
+                line_number=line_number + 1,
+
+                # We enable eager search, because we *know* there's a secret here -- the baseline
+                # flagged it after all.
+                enable_eager_search=bool(secret.line_number),
+            )
+
+            for identified_secret in (identified_secrets or []):
+                if identified_secret == secret:
+                    all_secrets.append(identified_secret)
+
+        if (
+            len(all_secrets) == 0 and
+            is_first_time_opening_file and
+            not line_getter.use_eager_transformers
+        ):
             line_getter.use_eager_transformers = True
         else:
-            break
-
-    raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
+            return all_secrets
 
 
 class LineGetter:

diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py
@@ -1,21 +1,18 @@
-import codecs
-import hashlib
 from enum import Enum
+from typing import Callable
 
 from ..constants import VerifiedResult
-from ..core.plugins.util import get_mapping_from_secret_type_to_class
-from ..core.plugins.util import Plugin
-from ..core.potential_secret import PotentialSecret
-from ..core.scan import _get_lines_from_file
-from ..core.scan import _scan_line
 from .common import get_baseline_from_file
+from .common import get_raw_secrets_from_file
+from .common import LineGetter
+from .common import open_file
 
 
 class SecretClassToPrint(Enum):
     REAL_SECRET = 1
     FALSE_POSITIVE = 2
 
-    def from_class(secret_class: VerifiedResult) -> Enum:
+    def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint':
         if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]:
             return SecretClassToPrint.REAL_SECRET
         else:
@@ -25,92 +22,50 @@ def from_class(secret_class: VerifiedResult) -> Enum:
 def generate_report(
     baseline_file: str,
     class_to_print: SecretClassToPrint = None,
+    line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
 ) -> None:
-    plugins = get_mapping_from_secret_type_to_class()
     secrets = {}
     for filename, secret in get_baseline_from_file(baseline_file):
-        verified_result = get_verified_result_from_boolean(secret.is_secret)
-        if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print:  # noqa: E501
+        verified_result = VerifiedResult.from_secret(secret)
+        if (
+            class_to_print is not None and
+            SecretClassToPrint.from_class(verified_result) != class_to_print
+        ):
             continue
-        try:
-            detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash)
-        except Exception:
-            continue
-        identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest()
+        # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. # noqa: E501
+        secret.line_number = 0
+        detections = get_raw_secrets_from_file(secret)
+        line_getter = line_getter_factory(filename)
         for detection in detections:
-            if identifier in secrets:
-                secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number)  # noqa: E501
-                if secret.type not in secrets[identifier]['types']:
-                    secrets[identifier]['types'].append(secret.type)
-                secrets[identifier]['category'] = get_prioritary_verified_result(
+            if (secret.secret_hash, filename) in secrets:
+                secrets[(secret.secret_hash, filename)]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1]  # noqa: E501
+                if secret.type not in secrets[(secret.secret_hash, filename)]['types']:
+                    secrets[(secret.secret_hash, filename)]['types'].append(secret.type)
+                secrets[(secret.secret_hash, filename)]['category'] = get_prioritized_verified_result(  # noqa: E501
                     verified_result,
-                    VerifiedResult[secrets[identifier]['category']],
+                    VerifiedResult[secrets[(secret.secret_hash, filename)]['category']],
                 ).name
             else:
-                secrets[identifier] = {
+                secrets[(secret.secret_hash, filename)] = {
                     'secrets': detection.secret_value,
                     'filename': filename,
                     'lines': {
-                        detection.line_number: get_line_content(filename, detection.line_number),
+                        detection.line_number: line_getter.lines[detection.line_number - 1],
                     },
                     'types': [
                         secret.type,
                     ],
                     'category': verified_result.name,
                 }
 
-    output = []
-    for identifier in secrets:
-        output.append(secrets[identifier])
-
-    return output
+    return list(secrets.values())
 
 
-def get_prioritary_verified_result(
+def get_prioritized_verified_result(
     result1: VerifiedResult,
     result2: VerifiedResult,
 ) -> VerifiedResult:
     if result1.value > result2.value:
         return result1
     else:
         return result2
-
-
-def get_verified_result_from_boolean(
-    is_secret: bool,
-) -> VerifiedResult:
-    if is_secret is None:
-        return VerifiedResult.UNVERIFIED
-    elif is_secret:
-        return VerifiedResult.VERIFIED_TRUE
-    else:
-        return VerifiedResult.VERIFIED_FALSE
-
-
-def get_potential_secrets(
-    filename: str,
-    plugin: Plugin,
-    secret_to_find: str,
-) -> [PotentialSecret]:
-    """
-    :returns: List of PotentialSecrets detected by a specific plugin in a file.
-    """
-    for lines in _get_lines_from_file(filename):
-        for line_number, line in list(enumerate(lines, 1)):
-            secrets = _scan_line(plugin, filename, line, line_number)
-            for secret in secrets:
-                if secret.secret_hash == secret_to_find:
-                    yield secret
-
-
-def get_line_content(
-    filename: str,
-    line_number: int,
-) -> str:
-    """
-    :returns: Line content from filename by line number.
-    """
-    content = codecs.open(filename, encoding='utf-8').read()
-    if not content:
-        return None
-    return content.splitlines()[line_number - 1]
diff --git a/detect_secrets/constants.py b/detect_secrets/constants.py
@@ -1,7 +1,18 @@
 from enum import Enum
 
+from .core.potential_secret import PotentialSecret
+
 
 class VerifiedResult(Enum):
     VERIFIED_FALSE = 1
     UNVERIFIED = 2
     VERIFIED_TRUE = 3
+
+    @staticmethod
+    def from_secret(secret: PotentialSecret) -> 'VerifiedResult':
+        if secret.is_secret is None:
+            return VerifiedResult.UNVERIFIED
+        elif secret.is_secret:
+            return VerifiedResult.VERIFIED_TRUE
+        else:
+            return VerifiedResult.VERIFIED_FALSE
diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py
@@ -21,7 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar
     )
 
     _add_mode_parser(parser)
-    _add_report_parser(parser)
+    _add_report_module(parser)
     _add_statistics_module(parser)
     return parser
 
@@ -46,8 +46,6 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None:
         ),
     )
 
-
-def _add_report_parser(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         '--report',
         action='store_true',
@@ -56,6 +54,16 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None:
         ),
     )
 
+
+def _add_report_module(parent: argparse.ArgumentParser) -> None:
+    parser = parent.add_argument_group(
+        title='reporting',
+        description=(
+            'Display a report with all the findings and the made decisions. '
+            'To be used with the report mode (--report).'
+        ),
+    )
+
     report_parser = parser.add_mutually_exclusive_group()
     report_parser.add_argument(
         '--only-real',