Skip to content
This repository has been archived by the owner on Jul 12, 2022. It is now read-only.

Commit

Permalink
shared false positive filters
Browse files Browse the repository at this point in the history
  • Loading branch information
domanchi committed Jun 17, 2019
1 parent fbf245f commit ef037bd
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 25 deletions.
6 changes: 5 additions & 1 deletion detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
from abc import abstractmethod
from abc import abstractproperty

from .common.constants import ALLOWLIST_REGEXES
from .common.filters import is_false_positive
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.common.constants import ALLOWLIST_REGEXES


class BasePlugin(object):
Expand Down Expand Up @@ -169,4 +170,7 @@ def analyze_string_content(self, string, line_num, filename):
def secret_generator(self, string, *args, **kwargs):
for regex in self.denylist:
for match in regex.findall(string):
if is_false_positive(match):
continue

yield match
45 changes: 45 additions & 0 deletions detect_secrets/plugins/common/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Heuristic, false positive filters that are shared across all plugin types.
This abstraction allows for development of later ML work, or further
heuristical determinations (e.g. word filter, entropy comparator).
"""
import string


def is_false_positive(secret):
for func in [
is_sequential_string,
]:
if func(secret):
return True

return False


def is_sequential_string(secret):
"""
Returns true if string is sequential.
"""
sequences = (
(
string.ascii_uppercase +
string.ascii_uppercase +
string.digits +
string.ascii_uppercase +
string.ascii_uppercase +
'+/'
),

# Capturing any number sequences
'0123456789' * 2,

string.hexdigits.upper() + string.hexdigits.upper(),
string.ascii_uppercase + '=/',
)

uppercase = secret.upper()
for sequential_string in sequences:
if uppercase in sequential_string:
return True

return False
31 changes: 7 additions & 24 deletions detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,12 @@
import yaml

from .base import BasePlugin
from .common.filters import is_false_positive
from .common.ini_file_parser import IniFileParser
from .common.yaml_file_parser import YamlFileParser
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.common.ini_file_parser import IniFileParser
from detect_secrets.plugins.common.yaml_file_parser import YamlFileParser


IGNORED_SEQUENTIAL_STRINGS = (
(
string.ascii_uppercase +
string.ascii_uppercase +
string.digits +
string.ascii_uppercase +
string.ascii_uppercase +
'+/'
),
string.hexdigits.upper() + string.hexdigits.upper(),
string.ascii_uppercase + '=/',
)


YAML_EXTENSIONS = (
'.yaml',
'.yml',
Expand Down Expand Up @@ -97,22 +86,16 @@ def calculate_shannon_entropy(self, data):

return entropy

def _is_sequential_string(self, string):
uppercased_string = string.upper()
for sequential_string in IGNORED_SEQUENTIAL_STRINGS:
if uppercased_string in sequential_string:
return True
return False

def analyze_string_content(self, string, line_num, filename):
"""Searches string for custom pattern, and captures all high entropy strings that
match self.regex, with a limit defined as self.entropy_limit.
"""
output = {}

for result in self.secret_generator(string):
if self._is_sequential_string(result):
if is_false_positive(result):
continue

secret = PotentialSecret(self.secret_type, filename, result, line_num)
output[secret] = secret

Expand Down
31 changes: 31 additions & 0 deletions tests/plugins/common/filters_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import absolute_import

import pytest

from detect_secrets.plugins.common import filters


class TestIsSequentialString:
# TODO: More tests should be had.

@pytest.mark.parametrize(
'secret',
(
'ABCDEF',
# Number sequences
'0123456789',
'1234567890',
),
)
def test_success(self, secret):
assert filters.is_sequential_string(secret)

@pytest.mark.parametrize(
'secret',
(
'BEEF1234',
),
)
def test_failure(self, secret):
assert not filters.is_sequential_string(secret)

0 comments on commit ef037bd

Please sign in to comment.