forked from Yelp/detect-secrets
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a4d66fa
Showing
61 changed files
with
4,463 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
venv/bin/activate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[run] | ||
branch = True | ||
source = . | ||
omit = | ||
.tox/* | ||
/tmp* | ||
setup.py | ||
|
||
[report] | ||
exclude_lines = | ||
# Don't complain if non-runnable code isn't run: | ||
^if __name__ == ['"]__main__['"]:$ | ||
# Need to redefine this, as per documentation | ||
pragma: no cover |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
deactivate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
*.py[co] | ||
*.sw[op] | ||
|
||
.coverage | ||
*.egg-info | ||
.tox | ||
venv | ||
/tmp | ||
|
||
|
||
.*ignore | ||
!.gitignore | ||
|
||
.pysensu.config.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
- repo: https://github.com/pre-commit/pre-commit-hooks | ||
sha: v0.9.1 | ||
hooks: | ||
- id: trailing-whitespace | ||
- id: end-of-file-fixer | ||
- id: autopep8-wrapper | ||
- id: check-docstring-first | ||
- id: debug-statements | ||
- id: name-tests-test | ||
exclude: tests/util | ||
- id: flake8 | ||
args: ['--ignore=E501'] | ||
exclude: ^test_data/ | ||
- repo: https://github.com/asottile/reorder_python_imports | ||
sha: v0.3.5 | ||
hooks: | ||
- id: reorder-python-imports | ||
language_version: python3.6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
- id: detect-secrets | ||
name: Detect secrets | ||
description: Detects high entropy strings that are likely to be passwords. | ||
entry: detect-secrets-hook | ||
args: ['--base64-limit', '4.5', '--hex-limit', '3'] | ||
language: python | ||
# for backward compatibility | ||
files: .* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
name: SecretFound # name needs to be one word | ||
alert_after: 0 | ||
realert_every: -1 # -1 means exponential backoff | ||
runbook: no-runbook-available | ||
dependencies: [] | ||
team: team-security | ||
irc_channels: [] | ||
notification_email: [email protected] | ||
ticket: False | ||
project: False | ||
page: False | ||
tip: detect_secrets found a secret | ||
status: 1 # status needs to be 1 (warning) or higher to send the email | ||
ttl: null # null gets constructed into None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
.PHONY: minimal | ||
minimal: setup | ||
|
||
.PHONY: setup | ||
setup: | ||
tox -e venv | ||
|
||
.PHONY: install-hooks | ||
install-hooks: | ||
tox -e pre-commit -- install -f --install-hooks | ||
|
||
.PHONY: test | ||
test: | ||
tox | ||
|
||
.PHONY: clean | ||
clean: | ||
find -name '*.pyc' -delete | ||
find -name '__pycache__' -delete | ||
rm -rf .tox | ||
rm -rf venv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
# detect_secrets | ||
|
||
## Description | ||
|
||
This is loosely based off [truffleHog](https://github.com/dxa4481/truffleHog/blob/master/truffleHog/truffleHog.py)'s secret scanner. However, instead of analyzing the entire git-history for secrets that have **ever** entered the repository, we wanted to perform preventative checks to make sure that no **additional** secrets will be added to the codebase. | ||
|
||
This is perfect for a backwards compatible solution, where you accept that there may **currently** be secrets hiding in your large repository, and you want to prevent new ones from entering without first dealing with the potentially gargantuous effort to move existing secrets away. | ||
|
||
We deal with this in two steps: | ||
|
||
1. Use a client-side pre-commit hook, to alert developers when they attempt to enter a secret in the code base. | ||
2. Set up a server-side cron job to periodically scan tracked repositories, to make sure that developers didn't accidentally skip the pre-commit check. | ||
|
||
## Installation | ||
|
||
There are three components that you can setup, depending on your purposes. | ||
|
||
### Pre-Commit Hook | ||
|
||
See [pre-commit](https://github.com/pre-commit/pre-commit) for instructions to install the pre-commit framework. | ||
|
||
Hooks available: | ||
|
||
- `detect-secrets`: This hook detects and prevents high entropy strings from entering the codebase. | ||
|
||
### Console Use / Server Use | ||
|
||
`pip install detect-secrets` | ||
|
||
## Configuration | ||
|
||
### Installing a baseline | ||
|
||
#### Step 1: Initialize your baseline. | ||
|
||
``` | ||
$ detect-secrets --initialize --exclude='^(\.git|venv)' > .secrets.baseline | ||
``` | ||
|
||
#### Use your baseline in your pre-commit hook | ||
|
||
``` | ||
- repo: <this repo> | ||
hooks: | ||
- id: detect-secrets | ||
args: ['--baseline', '.secrets.baseline'] | ||
``` | ||
|
||
Remember to initialize your baseline with the same sensitivity configurations as your pre-commit hook! | ||
|
||
### Sensitivity Configuration | ||
|
||
This module works by searching for high entropy strings in the codebase, and [calculating their Shannon entropy](http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html). If the entropy of a given string exceeds the preset amount, the string will be rejected as a potential secret. | ||
|
||
The sensitivity of this pre-commit hook can be adjusted with command-line flags (eg. `--base64_limit` and `--hex_limit`). Lowering these limits will identify more potential secrets, but also create more false positives. Adjust these limits to suit your needs. | ||
|
||
If you want a lower limit, but also want to whitelist specific strings from being detected, you can add the comment `# pragma: whitelist secret` to the line of code. | ||
|
||
For example: | ||
|
||
``` | ||
API_KEY = "blah-blah-but-actually-not-secret" # pragma: whitelist secret | ||
def main(): | ||
print('hello world') | ||
if __name__ == '__main__' | ||
main() | ||
``` | ||
|
||
This is the preferred way of whitelisting high entropy strings (rather than adding it to the baseline file), because it is easily searchable, auditable, and maintainable. | ||
|
||
### Setting up your server | ||
|
||
#### Step 1: Configure your config.yaml | ||
|
||
The following keys are accepted in your config file: | ||
|
||
``` | ||
config.yaml | ||
|- default # These are default values to use for each tracked repo. | ||
|- tracked # This is a list of tracked repos' details. | ||
``` | ||
|
||
Each tracked repository can have the following attributes: | ||
|
||
| attribute | description | ||
| --------------| ----------- | ||
| repo | where to `git clone` the repo from (**required**) | ||
| is_local_repo | True or False depending on if the repo is already on the filesystem (**required**) | ||
| sha | the commit hash to start scanning from (**required**) | ||
| cron | [crontab syntax](https://crontab.guru/) of how often to run a scan for this repo | ||
| plugins | list of plugins, with their respective settings | ||
| baseline | the filename to parse the detect-secrets baseline from | ||
|
||
See the sample `config.yaml.sample` for an example. | ||
|
||
#### Step 2: Configure your .pysensu.config.yaml | ||
|
||
See (pysensu-yelp)[http://pysensu-yelp.readthedocs.io/en/latest/#pysensu_yelp.send_event] for instructions on configuring your Sensu events. | ||
|
||
See the sample `.pysensu.config.yaml.sample` for an example, but be sure to name your file `.pysensu.config.yaml`. | ||
|
||
#### Step 3: Setup your cron jobs | ||
|
||
``` | ||
echo -e "$(crontab -l)\n\n$(detect-secrets-server --initialize)" | crontab - | ||
``` | ||
|
||
## Use Cases | ||
|
||
### Fresh Respository | ||
|
||
**Scenario**: You are starting a brand new repo, so you **know** you haven't committed any secrets to the codebase yet. Moving forward, you want to make sure you don't do so. | ||
|
||
**Solution**: Great! Just [install the pre-commit hook](TODO:Link) for preventative measures. | ||
|
||
### Existing Repository | ||
|
||
**Scenario**: You have an existing repo that may or may not have secrets added to it before. You want to prevent further secrets from being committed, yet it's too much work to migrate all currently existing secrets in the codebase out. | ||
|
||
**Solution**: | ||
|
||
1. Create a baseline of existing secrets, so that the pre-commit hook will only detect the new secrets added. | ||
2. [Install the pre-commit hook](TODO:Link) for preventative measures. | ||
|
||
## A Few Caveats | ||
|
||
This is not meant to be a sure-fire solution to prevent secrets from entering the codebase. Only proper developer education can truly do that. This pre-commit hook merely implements several heuristics to try and prevent obvious cases of committing secrets. | ||
|
||
### Things that won't be prevented | ||
|
||
* Multi-line secrets. | ||
* Default passwords (eg. `password = "password"`) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
default: | ||
plugins: | ||
HexHighEntropyString: 3 | ||
Base64HighEntropyString: 4.5 | ||
baseline: .secrets.baseline | ||
base_tmp_dir: /tmp/detect_secrets_tracked_repos | ||
exclude_regex: ^(\.git|build|logs|node_modules|virtualenv_run)|.*tests/.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"accessKeyId": "", | ||
"secretAccessKey": "", | ||
"region": "us-east-1" | ||
} |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#!/usr/bin/python | ||
from __future__ import absolute_import | ||
|
||
import os | ||
import re | ||
|
||
from detect_secrets.core.secrets_collection import SecretsCollection | ||
|
||
|
||
def apply_baseline_filter(results, baseline, filelist): | ||
""" | ||
:param results: SecretsCollection of current results | ||
:param baseline: SecretsCollection of baseline results. | ||
This will be updated accordingly (by reference) | ||
:param filelist: list of strings; filenames that are scanned. | ||
:returns: SecretsCollection of new results (filtering out baseline) | ||
""" | ||
output = SecretsCollection() | ||
|
||
if baseline.exclude_regex: | ||
regex = re.compile(baseline.exclude_regex, re.IGNORECASE) | ||
|
||
# First, we find all the secrets that are not currently in the baseline. | ||
for filename in results.data: | ||
# If the file matches the exclude_regex, we skip it | ||
if baseline.exclude_regex and regex.search(filename): | ||
continue | ||
if filename not in baseline.data: | ||
# We don't have a previous record of this file, so obviously | ||
# everything is new. | ||
output.data[filename] = results.data[filename] | ||
continue | ||
|
||
# The __hash__ method of PotentialSecret makes this work | ||
tmp = {secret: secret for secret in results.data[filename] if secret not in baseline.data[filename]} | ||
|
||
if tmp: | ||
output.data[filename] = tmp | ||
|
||
# If there are new secrets, stop the process here. Otherwise, | ||
# try to update the baseline with recently removed secrets. | ||
if len(output.data) > 0: | ||
return output | ||
|
||
# Only attempt baseline modifications if we don't find any new secrets | ||
for filename in filelist: | ||
if filename not in baseline.data: | ||
# Nothing to modify, because not even there in the first place. | ||
continue | ||
|
||
if filename not in results.data: | ||
# All secrets relating to that file was removed. | ||
del baseline.data[filename] | ||
continue | ||
|
||
baseline_clone = baseline.data[filename].copy() | ||
for obj in baseline_clone: | ||
results_obj = results.get_secret( | ||
filename, | ||
obj.secret_hash, | ||
obj.type | ||
) | ||
if results_obj is None: | ||
# No longer in results, so can remove from baseline | ||
obj_to_delete = baseline.get_secret( | ||
filename, | ||
obj.secret_hash, | ||
obj.type | ||
) | ||
del baseline.data[filename][obj_to_delete] | ||
|
||
elif results_obj.lineno != obj.lineno: | ||
# Secret moved around, should update baseline with new location | ||
baseline_obj = baseline.get_secret( | ||
filename, | ||
obj.secret_hash, | ||
obj.type | ||
) | ||
baseline_obj.lineno = results_obj.lineno | ||
|
||
return output | ||
|
||
|
||
def initialize(plugins, exclude_regex=None, rootdir='.'): | ||
"""Scans the entire codebase for high entropy strings, and returns a | ||
SecretsCollection object. | ||
:param plugins: tuple of detect_secrets.plugins.base.BasePlugin. | ||
:param [exclude_regex]: string; for optional regex string for ignored paths. | ||
:param [rootdir]: string; specify root directory. | ||
:returns: SecretsCollection | ||
""" | ||
output = SecretsCollection(plugins) | ||
|
||
if exclude_regex: | ||
regex = re.compile(exclude_regex, re.IGNORECASE) | ||
|
||
rootdir = os.path.abspath(rootdir) | ||
|
||
for subdir, dirs, files in os.walk(rootdir): | ||
if exclude_regex and regex.search(subdir[len(rootdir) + 1:]): | ||
continue | ||
|
||
for file in files: | ||
fullpath = os.path.join(subdir, file) | ||
|
||
# Cover root-level files (because the preliminary regex check won't cover it) | ||
if exclude_regex and regex.search(fullpath[len(rootdir) + 1:]): | ||
continue | ||
|
||
output.scan_file(fullpath, fullpath[len(rootdir) + 1:]) | ||
|
||
return output |
Oops, something went wrong.