From 4551c1dffdd50cc92e64adfd55745127b0992229 Mon Sep 17 00:00:00 2001 From: nimrodkor Date: Tue, 13 Sep 2022 09:58:49 +0300 Subject: [PATCH 1/4] Don't filter out AWS access key ID with the ID filter --- detect_secrets/filters/heuristic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/detect_secrets/filters/heuristic.py b/detect_secrets/filters/heuristic.py index c95995d65..6ae170b61 100644 --- a/detect_secrets/filters/heuristic.py +++ b/detect_secrets/filters/heuristic.py @@ -4,6 +4,8 @@ from functools import lru_cache from typing import Pattern +from detect_secrets.plugins.base import BasePlugin + def is_sequential_string(secret: str) -> bool: sequences = ( @@ -57,13 +59,13 @@ def _get_uuid_regex() -> Pattern: ) -def is_likely_id_string(secret: str, line: str) -> bool: +def is_likely_id_string(secret: str, line: str, plugin: BasePlugin) -> bool: try: index = line.index(secret) except ValueError: return False - return bool(_get_id_detector_regex().search(line, pos=0, endpos=index)) + return plugin.secret_type != 'AWS Access Key' and bool(_get_id_detector_regex().search(line, pos=0, endpos=index)) @lru_cache(maxsize=1) From 787ad04212c5bd73913b63917382abb59e5ec18d Mon Sep 17 00:00:00 2001 From: nimrodkor Date: Tue, 13 Sep 2022 10:25:09 +0300 Subject: [PATCH 2/4] Add UTs and extend to all regex based detectors --- detect_secrets/filters/heuristic.py | 9 +++++---- tests/filters/heuristic_filter_test.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/detect_secrets/filters/heuristic.py b/detect_secrets/filters/heuristic.py index 6ae170b61..e4272f8ae 100644 --- a/detect_secrets/filters/heuristic.py +++ b/detect_secrets/filters/heuristic.py @@ -2,9 +2,9 @@ import re import string from functools import lru_cache -from typing import Pattern +from typing import Pattern, Optional -from detect_secrets.plugins.base import BasePlugin +from detect_secrets.plugins.base import BasePlugin, RegexBasedDetector def is_sequential_string(secret: str) -> bool: @@ -59,13 +59,14 @@ def _get_uuid_regex() -> Pattern: ) -def is_likely_id_string(secret: str, line: str, plugin: BasePlugin) -> bool: +def is_likely_id_string(secret: str, line: str, plugin: Optional[BasePlugin] = None) -> bool: try: index = line.index(secret) except ValueError: return False - return plugin.secret_type != 'AWS Access Key' and bool(_get_id_detector_regex().search(line, pos=0, endpos=index)) + return (not plugin or not isinstance(plugin, RegexBasedDetector)) \ + and bool(_get_id_detector_regex().search(line, pos=0, endpos=index)) @lru_cache(maxsize=1) diff --git a/tests/filters/heuristic_filter_test.py b/tests/filters/heuristic_filter_test.py index 1962731ae..9cccde487 100644 --- a/tests/filters/heuristic_filter_test.py +++ b/tests/filters/heuristic_filter_test.py @@ -4,6 +4,7 @@ from detect_secrets import filters from detect_secrets.core.scan import scan_line +from detect_secrets.plugins.aws import AWSKeyDetector from detect_secrets.settings import transient_settings @@ -90,10 +91,15 @@ def test_success(self, secret, line): # fail although the word david ends in id ('RANDOM_STRING', 'postgres://david:RANDOM_STRING'), + + # fail since this is an aws access key id, a real secret + ('AKIA4NACSIJMDDNSEDTE', 'aws_access_key_id=AKIA4NACSIJMDDNSEDTE') ], ) - def test_failure(self, secret, line): - assert not filters.heuristic.is_likely_id_string(secret, line) + def test_failure(self, secret, line, plugin=None): + if secret.startswith('AKIA'): + plugin = AWSKeyDetector() + assert not filters.heuristic.is_likely_id_string(secret, line, plugin) @pytest.mark.parametrize( From c96240a6a921e4de40cd8e90cd4e51b4cfef0387 Mon Sep 17 00:00:00 2001 From: nimrodkor Date: Thu, 22 Sep 2022 10:07:25 +0300 Subject: [PATCH 3/4] Fix by pre-commit --- detect_secrets/filters/heuristic.py | 6 ++++-- tests/filters/heuristic_filter_test.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/detect_secrets/filters/heuristic.py b/detect_secrets/filters/heuristic.py index e4272f8ae..0dbdb4949 100644 --- a/detect_secrets/filters/heuristic.py +++ b/detect_secrets/filters/heuristic.py @@ -2,9 +2,11 @@ import re import string from functools import lru_cache -from typing import Pattern, Optional +from typing import Optional +from typing import Pattern -from detect_secrets.plugins.base import BasePlugin, RegexBasedDetector +from detect_secrets.plugins.base import BasePlugin +from detect_secrets.plugins.base import RegexBasedDetector def is_sequential_string(secret: str) -> bool: diff --git a/tests/filters/heuristic_filter_test.py b/tests/filters/heuristic_filter_test.py index 9cccde487..2eaad4373 100644 --- a/tests/filters/heuristic_filter_test.py +++ b/tests/filters/heuristic_filter_test.py @@ -93,7 +93,7 @@ def test_success(self, secret, line): ('RANDOM_STRING', 'postgres://david:RANDOM_STRING'), # fail since this is an aws access key id, a real secret - ('AKIA4NACSIJMDDNSEDTE', 'aws_access_key_id=AKIA4NACSIJMDDNSEDTE') + ('AKIA4NACSIJMDDNSEDTE', 'aws_access_key_id=AKIA4NACSIJMDDNSEDTE'), ], ) def test_failure(self, secret, line, plugin=None): From a91bcdb8729bea7188499c3c6ff91efb0eb2592e Mon Sep 17 00:00:00 2001 From: nimrodkor Date: Thu, 22 Sep 2022 20:11:02 +0300 Subject: [PATCH 4/4] Improve test pattern --- tests/filters/heuristic_filter_test.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/filters/heuristic_filter_test.py b/tests/filters/heuristic_filter_test.py index 2eaad4373..a2f5dbb2b 100644 --- a/tests/filters/heuristic_filter_test.py +++ b/tests/filters/heuristic_filter_test.py @@ -78,27 +78,25 @@ def test_success(self, secret, line): assert filters.heuristic.is_likely_id_string(secret, line) @pytest.mark.parametrize( - 'secret, line', + 'secret, line, plugin', [ # the word hidden has the word id in it, but lets # not mark that as an id string - ('RANDOM_STRING', 'hidden_secret: RANDOM_STRING'), - ('RANDOM_STRING', 'hidden_secret=RANDOM_STRING'), - ('RANDOM_STRING', 'hidden_secret = RANDOM_STRING'), + ('RANDOM_STRING', 'hidden_secret: RANDOM_STRING', None), + ('RANDOM_STRING', 'hidden_secret=RANDOM_STRING', None), + ('RANDOM_STRING', 'hidden_secret = RANDOM_STRING', None), # fail silently if the secret isn't even on the line - ('SOME_RANDOM_STRING', 'id: SOME_OTHER_RANDOM_STRING'), + ('SOME_RANDOM_STRING', 'id: SOME_OTHER_RANDOM_STRING', None), # fail although the word david ends in id - ('RANDOM_STRING', 'postgres://david:RANDOM_STRING'), + ('RANDOM_STRING', 'postgres://david:RANDOM_STRING', None), # fail since this is an aws access key id, a real secret - ('AKIA4NACSIJMDDNSEDTE', 'aws_access_key_id=AKIA4NACSIJMDDNSEDTE'), + ('AKIA4NACSIJMDDNSEDTE', 'aws_access_key_id=AKIA4NACSIJMDDNSEDTE', AWSKeyDetector()), ], ) - def test_failure(self, secret, line, plugin=None): - if secret.startswith('AKIA'): - plugin = AWSKeyDetector() + def test_failure(self, secret, line, plugin): assert not filters.heuristic.is_likely_id_string(secret, line, plugin)