Skip to content

Commit

Permalink
Merge pull request Yelp#441 from Yelp/feature/support-multiprocessing…
Browse files Browse the repository at this point in the history
…-threads

feature: adding multiprocessing support
  • Loading branch information
domanchi authored Apr 12, 2021
2 parents a7dfa01 + effbc14 commit e220759
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 8 deletions.
23 changes: 15 additions & 8 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import cast
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

from . import upgrades
Expand All @@ -18,16 +19,22 @@
from .secrets_collection import SecretsCollection


def create(*paths: str, should_scan_all_files: bool = False, root: str = '') -> SecretsCollection:
def create(
*paths: str,
should_scan_all_files: bool = False,
root: str = '',
num_processors: Optional[int] = None,
) -> SecretsCollection:
"""Scans all the files recursively in path to initialize a baseline."""
secrets = SecretsCollection(root=root)
kwargs = {}
if num_processors:
kwargs['num_processors'] = num_processors

for filename in get_files_to_scan(
*paths,
should_scan_all_files=should_scan_all_files,
root=root,
):
secrets.scan_file(filename)
secrets = SecretsCollection(root=root)
secrets.scan_files(
*get_files_to_scan(*paths, should_scan_all_files=should_scan_all_files, root=root),
**kwargs,
)

return secrets

Expand Down
23 changes: 23 additions & 0 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import multiprocessing as mp
import os
from collections import defaultdict
from typing import Any
Expand Down Expand Up @@ -45,6 +46,23 @@ def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection':
def files(self) -> Set[str]:
return set(self.data.keys())

def scan_files(self, *filenames: str, num_processors: Optional[int] = None) -> None:
"""Just like scan_file, but optimized through parallel processing."""
if len(filenames) == 1:
self.scan_file(filenames[0])
return

if not num_processors:
num_processors = mp.cpu_count()

with mp.Pool(processes=num_processors) as p:
for secrets in p.imap_unordered(
_scan_file_and_serialize,
[os.path.join(self.root, filename) for filename in filenames],
):
for secret in secrets:
self[os.path.relpath(secret.filename, self.root)].add(secret)

def scan_file(self, filename: str) -> None:
for secret in scan.scan_file(os.path.join(self.root, filename)):
self[filename].add(secret)
Expand Down Expand Up @@ -269,3 +287,8 @@ def __sub__(self, other: Any) -> 'SecretsCollection':
output[filename] = self[filename]

return output


def _scan_file_and_serialize(filename: str) -> List[PotentialSecret]:
"""Used for multiprocessing, since lambdas can't be serialized."""
return list(scan.scan_file(filename))
14 changes: 14 additions & 0 deletions detect_secrets/core/usage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ def add_default_options(self) -> 'ParserBuilder':
'working directory.'
),
)
self._parser.add_argument(
'-c',
'--cores',
dest='num_cores',
nargs=1,
type=int,
default=[None],
help=(
'Specify the number of cores to use for parallel processing. Defaults to '
'using the max cores on the current host.'
),
)

return self

Expand Down Expand Up @@ -161,6 +173,8 @@ def parse_args(self, argv: Optional[List[str]] = None) -> argparse.Namespace:
if args.path == ['.']:
args.path = [args.custom_root]

args.num_cores = args.num_cores[0]

return args


Expand Down
1 change: 1 addition & 0 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def handle_scan_action(args: argparse.Namespace) -> None:
*args.path,
should_scan_all_files=args.all_files,
root=args.custom_root,
num_processors=args.num_cores,
)
if args.baseline is not None:
# The pre-commit hook's baseline upgrade is to trim the supplied baseline for non-existent
Expand Down

0 comments on commit e220759

Please sign in to comment.