Skip to content

Commit

Permalink
feature: adding multiprocessing support
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaron Loo committed Apr 1, 2021
1 parent 4659023 commit 6d1f0ad
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
10 changes: 3 additions & 7 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,9 @@
def create(*paths: str, should_scan_all_files: bool = False, root: str = '') -> SecretsCollection:
"""Scans all the files recursively in path to initialize a baseline."""
secrets = SecretsCollection(root=root)

for filename in get_files_to_scan(
*paths,
should_scan_all_files=should_scan_all_files,
root=root,
):
secrets.scan_file(filename)
secrets.scan_files(
*get_files_to_scan(*paths, should_scan_all_files=should_scan_all_files, root=root)
)

return secrets

Expand Down
16 changes: 16 additions & 0 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import multiprocessing as mp
import os
from collections import defaultdict
from typing import Any
Expand Down Expand Up @@ -45,6 +46,16 @@ def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection':
def files(self) -> Set[str]:
return set(self.data.keys())

def scan_files(self, *filenames: str, num_processors: int = mp.cpu_count()) -> None:
"""Just like scan_file, but optimized through parallel processing."""
with mp.Pool(processes=num_processors) as p:
for secrets in p.imap_unordered(
_scan_file_and_serialize,
[os.path.join(self.root, filename) for filename in filenames],
):
for secret in secrets:
self[os.path.relpath(secret.filename, self.root)].add(secret)

def scan_file(self, filename: str) -> None:
for secret in scan.scan_file(os.path.join(self.root, filename)):
self[filename].add(secret)
Expand Down Expand Up @@ -269,3 +280,8 @@ def __sub__(self, other: Any) -> 'SecretsCollection':
output[filename] = self[filename]

return output


def _scan_file_and_serialize(filename: str) -> List[PotentialSecret]:
"""Used for multiprocessing, since lambdas can't be serialized."""
return list(scan.scan_file(filename))

0 comments on commit 6d1f0ad

Please sign in to comment.