Skip to content

Commit

Permalink
Merge branch 'version1' into feature/keyword-c
Browse files Browse the repository at this point in the history
  • Loading branch information
pablosnt committed Apr 5, 2021
2 parents eee4649 + 5ffc17d commit ee60f96
Show file tree
Hide file tree
Showing 22 changed files with 151 additions and 37 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ Create a baseline of potential secrets currently found in your git repository.
$ detect-secrets scan > .secrets.baseline
```

or, to run it from a different directory:

```bash
$ detect-secrets -C /path/to/directory scan > /path/to/directory/.secrets.baseline
```

**Scanning non-git tracked files:**

```bash
Expand Down Expand Up @@ -71,7 +77,7 @@ For baselines older than version 0.9, just recreate it.
**Scanning Staged Files Only:**

```bash
$ detect-secret-hook --baseline .secrets.baseline $(git diff --staged --name-only)
$ detect-secrets-hook --baseline .secrets.baseline $(git diff --staged --name-only)
```

**Scanning All Tracked Files:**
Expand Down
10 changes: 7 additions & 3 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
from .secrets_collection import SecretsCollection


def create(*paths: str, should_scan_all_files: bool = False) -> SecretsCollection:
def create(*paths: str, should_scan_all_files: bool = False, root: str = '') -> SecretsCollection:
"""Scans all the files recursively in path to initialize a baseline."""
secrets = SecretsCollection()
secrets = SecretsCollection(root=root)

for filename in get_files_to_scan(*paths, should_scan_all_files=should_scan_all_files):
for filename in get_files_to_scan(
*paths,
should_scan_all_files=should_scan_all_files,
root=root,
):
secrets.scan_file(filename)

return secrets
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/core/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,4 @@ def set_debug_level(self, debug_level: int) -> None:
)


log = get_logger()
log = get_logger('detect-secrets')
2 changes: 1 addition & 1 deletion detect_secrets/core/plugins/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def from_file(filename: str) -> Iterable[Type[Plugin]]:
for plugin_class in get_plugins_from_file(filename):
secret_type = plugin_class.secret_type # type: ignore
if secret_type in get_mapping_from_secret_type_to_class():
log.debug(f'Duplicate plugin detected: {plugin_class.__name__}. Skipping...')
log.info(f'Duplicate plugin detected: {plugin_class.__name__}. Skipping...')

get_mapping_from_secret_type_to_class()[secret_type] = plugin_class
output.append(plugin_class)
Expand Down
22 changes: 16 additions & 6 deletions detect_secrets/core/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
from ..util import git
from ..util.code_snippet import get_code_snippet
from ..util.inject import call_function_with_arguments
from ..util.path import get_relative_path_if_in_cwd
from ..util.path import get_relative_path
from .log import log
from .plugins import Plugin
from .potential_secret import PotentialSecret


def get_files_to_scan(
*paths: str,
should_scan_all_files: bool = False
should_scan_all_files: bool = False,
root: str = '',
) -> Generator[str, None, None]:
"""
If we specify specific files, we should be able to scan them. This abides by the
Expand All @@ -49,7 +50,12 @@ def get_files_to_scan(
the scan for all files.
See test cases for more details.
:param root: if not specified, will assume current repository as root.
"""
if root:
root = os.path.realpath(root)

# First, we determine the appropriate filtering mode to be used.
# If this is True, then it will consider everything to be valid.
# Otherwise, it will only list the files that are valid.
Expand All @@ -62,7 +68,7 @@ def get_files_to_scan(

if not should_scan_all_files:
try:
valid_paths = git.get_tracked_files(git.get_root_directory())
valid_paths = git.get_tracked_files(git.get_root_directory(root))
except subprocess.CalledProcessError:
log.warning('Did not detect git repository. Try scanning all files instead.')
valid_paths = False
Expand All @@ -77,14 +83,17 @@ def get_files_to_scan(

for path in paths:
iterator = (
cast(List[Tuple], [(os.getcwd(), None, [path])])
cast(List[Tuple], [(root or os.getcwd(), None, [path])])
if os.path.isfile(path)
else os.walk(path)
)

for path_root, _, filenames in iterator:
for filename in filenames:
relative_path = get_relative_path_if_in_cwd(os.path.join(path_root, filename))
relative_path = get_relative_path(
root=root or os.getcwd(),
path=os.path.join(path_root, filename),
)
if not relative_path:
# e.g. symbolic links may be pointing outside the root directory
continue
Expand Down Expand Up @@ -295,6 +304,7 @@ def _process_line_based_plugins(
# NOTE: We iterate through lines *then* plugins, because we want to quit early if any of the
# filters return True.
for line_number, line in lines:
log.debug(f'Processing {filename}:{line_number}')
line = line.rstrip()
code_snippet = get_code_snippet(
lines=line_content,
Expand Down Expand Up @@ -370,7 +380,7 @@ def _is_filtered_out(required_filter_parameters: Iterable[str], **kwargs: Any) -
else:
debug_msg = f'Skipping secret due to `{filter_fn.path}`.'

log.debug(debug_msg)
log.info(debug_msg)
return True
except TypeError:
# Skipping non-compatible filters
Expand Down
13 changes: 10 additions & 3 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,15 @@ def __iter__(self) -> Generator:


class SecretsCollection:
def __init__(self) -> None:
def __init__(self, root: str = '') -> None:
"""
:param root: if specified, will scan as if the root was the value provided,
rather than the current working directory. We still store results as if
relative to root, since we're running as if it was in a different directory,
rather than scanning a different directory.
"""
self.data: Dict[str, Set[PotentialSecret]] = defaultdict(set)
self.root = root

@classmethod
def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection':
Expand All @@ -39,8 +46,8 @@ def files(self) -> Set[str]:
return set(self.data.keys())

def scan_file(self, filename: str) -> None:
for secret in scan.scan_file(filename):
self[secret.filename].add(secret)
for secret in scan.scan_file(os.path.join(self.root, filename)):
self[filename].add(secret)

def scan_diff(self, diff: str) -> None:
"""
Expand Down
24 changes: 24 additions & 0 deletions detect_secrets/core/usage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import filters
from . import plugins
from . import scan
from ...settings import get_settings
from .common import initialize_plugin_settings
from detect_secrets.__version__ import VERSION

Expand All @@ -35,6 +36,17 @@ def add_default_options(self) -> 'ParserBuilder':
version=VERSION,
help='Display version information.',
)
self._parser.add_argument(
'-C',
metavar='<path>',
dest='custom_root',
nargs=1,
default=[''],
help=(
'Run as if detect-secrets was started in <path>, rather than in the current '
'working directory.'
),
)

return self

Expand Down Expand Up @@ -137,6 +149,18 @@ def parse_args(self, argv: Optional[List[str]] = None) -> argparse.Namespace:
print(f'error: {str(e)}', file=sys.stderr)
sys.exit(1)

args.custom_root = args.custom_root[0]
if args.custom_root:
# This filter assumes current working directory, which will fail if we're running
# from a different directory.
# TODO: Maybe adjust this so that it is directory agnostic?
get_settings().disable_filters('detect_secrets.filters.common.is_invalid_file')

# Abide by the Principle of Least Surprise, and have the default value be the
# custom root directory itself.
if args.path == ['.']:
args.path = [args.custom_root]

return args


Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/core/usage/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _add_initialize_baseline_options(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
'path',
nargs='*',
default='.',
default=['.'],
help=(
'Scans the entire codebase and outputs a snapshot of '
'currently identified secrets.'
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/filters/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def is_invalid_file(filename: str) -> bool:


def is_baseline_file(filename: str) -> bool:
return filename == _get_baseline_filename()
return os.path.basename(filename) == _get_baseline_filename()


@lru_cache(maxsize=1)
Expand Down
8 changes: 8 additions & 0 deletions detect_secrets/filters/heuristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ def is_lock_file(filename: str) -> bool:
}


def is_not_alphanumeric_string(secret: str) -> bool:
"""
This assumes that secrets should have at least ONE letter in them.
This helps avoid clear false positives, like `*****`.
"""
return not bool(set(string.ascii_letters) & set(secret))


def is_swagger_file(filename: str) -> bool:
"""
Filters swagger files and paths, like swagger-ui.html or /swagger/.
Expand Down
14 changes: 11 additions & 3 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,23 @@ def handle_scan_action(args: argparse.Namespace) -> None:
return

if args.only_allowlisted:
secrets = SecretsCollection()
for filename in get_files_to_scan(*args.path, should_scan_all_files=args.all_files):
secrets = SecretsCollection(root=args.custom_root)
for filename in get_files_to_scan(
*args.path,
should_scan_all_files=args.all_files,
root=args.custom_root,
):
for secret in scan_for_allowlisted_secrets_in_file(filename):
secrets[secret.filename].add(secret)

print(json.dumps(baseline.format_for_output(secrets), indent=2))
return

secrets = baseline.create(*args.path, should_scan_all_files=args.all_files)
secrets = baseline.create(
*args.path,
should_scan_all_files=args.all_files,
root=args.custom_root,
)
if args.baseline is not None:
# The pre-commit hook's baseline upgrade is to trim the supplied baseline for non-existent
# secrets, and to upgrade the format to the latest version. This is because the pre-commit
Expand Down
8 changes: 4 additions & 4 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@
OPTIONAL_NON_WHITESPACE = r'[^\s]{0,50}?'
QUOTE = r'[\'"`]'
# Secret regex details:
# [^\v\'"]* -> this section match with every character except line breaks and quotes. This
# allows to find secrets that starts with symbols or alphanumeric characters.
# (?=[^\v\'"]*) -> this section match with every character except line breaks and quotes. This
# allows to find secrets that starts with symbols or alphanumeric characters.
#
# \w+ -> this section match only with words (letters, numbers or _ are allowed), and at
# (?=\w+) -> this section match only with words (letters, numbers or _ are allowed), and at
# least one character is required. This allows to reduce the false positives
# number.
#
Expand All @@ -83,7 +83,7 @@
# [^\v,\'"`] -> this section match with the last secret character that can be everything except
# line breaks, comma, backticks or quotes. This allows to reduce the false
# positives number and to prevent errors in the code snippet highlighting.
SECRET = r'[^\v\'\"]*\w+[^\v\'\"]*[^\v,\'\"`]'
SECRET = r'(?=[^\v\'\"]*)(?=\w+)[^\v\'\"]*[^\v,\'\"`]'
SQUARE_BRACKETS = r'(\[[0-9]*\])'

FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
Expand Down
1 change: 1 addition & 0 deletions detect_secrets/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def clear(self) -> None:
'detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign',
'detect_secrets.filters.heuristic.is_indirect_reference',
'detect_secrets.filters.heuristic.is_lock_file',
'detect_secrets.filters.heuristic.is_not_alphanumeric_string',
'detect_secrets.filters.heuristic.is_swagger_file',
}
}
Expand Down
15 changes: 9 additions & 6 deletions detect_secrets/util/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
from typing import Set

from ..core.log import log
from .path import get_relative_path_if_in_cwd
from .path import get_relative_path


def get_root_directory() -> str:
def get_root_directory(path: str = '') -> str:
"""
:raises: CalledProcessError
"""
return subprocess.check_output(
'git rev-parse --show-toplevel'.split(),
).decode('utf-8').strip()
command = ['git']
if path:
command.extend(['-C', path])

command.extend(['rev-parse', '--show-toplevel'])
return subprocess.check_output(command).decode('utf-8').strip()


def get_tracked_files(root: str) -> Set[str]:
Expand All @@ -33,7 +36,7 @@ def get_tracked_files(root: str) -> Set[str]:
)

for filename in files.decode('utf-8').splitlines():
path = get_relative_path_if_in_cwd(os.path.join(root, filename))
path = get_relative_path(root, os.path.join(root, filename))
if path:
output.add(path)

Expand Down
8 changes: 8 additions & 0 deletions detect_secrets/util/path.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import os
from pathlib import Path
from typing import Optional


def get_relative_path(root: str, path: str) -> Optional[str]:
if Path(os.getcwd()) == Path(root):
return get_relative_path_if_in_cwd(path)

return os.path.realpath(path)[len(root + '/'):]


def get_relative_path_if_in_cwd(path: str) -> Optional[str]:
filepath = os.path.realpath(path)[len(os.getcwd() + '/'):]
if os.path.isfile(filepath):
Expand Down
1 change: 1 addition & 0 deletions docs/filters.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ the `detect_secrets.filters` namespace.
| `heuristic.is_likely_id_string` | Ignores secret values prefixed with `id`. |
| `heuristic.is_lock_file` | Ignores common lock files. |
| `heuristic.is_non_text_file` | Ignores non-text files (e.g. archives, images). |
| `heuristic.is_not_alphanumeric_string` | Ignores secrets that do not have a single alphanumeric character in it. |
| `heuristic.is_potential_uuid` | Ignores uuid looking secret values. |
| `heuristic.is_prefixed_with_dollar_sign` | Primarily for `KeywordDetector`, filters secrets like `secret = $variableName;`. |
| `heuristic.is_sequential_string` | Ignores secrets like `abcdefg`. |
Expand Down
4 changes: 2 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pycodestyle==2.3.1
pyflakes==1.6.0
pyparsing==2.4.7
pytest==6.1.2
PyYAML==5.3.1
PyYAML==5.4
requests==2.25.0
responses==0.12.1
six==1.15.0
Expand All @@ -37,6 +37,6 @@ tox-pip-extensions==1.6.0
typed-ast==1.4.1
typing-extensions==3.7.4.3
unidiff==0.6.0
urllib3==1.26.2
urllib3==1.26.3
virtualenv==20.2.1
zipp==3.4.0
4 changes: 2 additions & 2 deletions test_data/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ keyA =
value3
; This is another comment

keyB = 456789123
567891234
keyB = 456789123a
567891234b

keyC =

Expand Down
Loading

0 comments on commit ee60f96

Please sign in to comment.