diff --git a/regexploit/bin/files.py b/regexploit/bin/files.py new file mode 100644 index 0000000..76b84ba --- /dev/null +++ b/regexploit/bin/files.py @@ -0,0 +1,36 @@ +import os +import os.path + +from glob import iglob +from typing import List, Optional + + +def _file_generator( + files_argument: List[str], is_glob: bool, filename_globs: List[str] +): + if is_glob: + for fglob in files_argument: + yield from iglob(fglob, recursive=True) + else: + for f in files_argument: + if os.path.isdir(f): + for g in filename_globs: + yield from iglob(os.path.join(f, "**", g), recursive=True) + else: + yield f + + +def file_generator( + files_argument: List[str], + is_glob: bool, + filename_globs: List[str], + ignore: Optional[List[str]] = None, +): + gen = _file_generator(files_argument, is_glob, filename_globs) + if ignore: + for f in gen: + if any(i in f for i in ignore): + continue + yield f + else: + yield from gen diff --git a/regexploit/bin/regexploit_js.py b/regexploit/bin/regexploit_js.py index 429f7e4..4723727 100644 --- a/regexploit/bin/regexploit_js.py +++ b/regexploit/bin/regexploit_js.py @@ -10,10 +10,8 @@ import traceback import warnings -from glob import iglob -from itertools import islice - from regexploit.ast.sre import SreOpParser +from regexploit.bin.files import file_generator from regexploit.javascript import fix_js_regex from regexploit.redos import find from regexploit.output.text import TextOutput @@ -98,17 +96,16 @@ def main(): "--glob", action="store_true", help="Glob the input filenames (**/*)" ) parser.add_argument("--verbose", action="store_true", help="Verbose logging") + parser.add_argument( + "--ignore", action="append", help="Paths containing this string are ignored" + ) args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) output = TextOutput(js_flavour=True) - files = ( - (fname for fglob in args.files for fname in iglob(fglob, recursive=True)) - if args.glob - else iter(args.files) - ) + files = file_generator(args.files, args.glob, ["*.js", "*.ts"], args.ignore) while True: batch = [] for _ in range(50): diff --git a/regexploit/bin/regexploit_python_ast.py b/regexploit/bin/regexploit_python_ast.py index a33382e..e66df31 100644 --- a/regexploit/bin/regexploit_python_ast.py +++ b/regexploit/bin/regexploit_python_ast.py @@ -2,7 +2,7 @@ import argparse import ast import json -import os.path +import logging import re import subprocess import sys @@ -12,6 +12,7 @@ from glob import iglob from regexploit.ast.sre import SreOpParser +from regexploit.bin.files import file_generator from regexploit.python_node_visitor import PythonNodeVisitor from regexploit.redos import find from regexploit.output.text import TextOutput @@ -27,8 +28,8 @@ def handle_file(filename: str, output: TextOutput): except RecursionError: print(f"RecursionError parsing AST for {filename}") return - except SyntaxError: - print(f"Bad Python3 syntax in {filename}") + except SyntaxError as e: + print(f"Bad Python3 syntax in {filename}: {e}") return for regex in pnv.patterns: first_for_regex = True @@ -67,19 +68,23 @@ def main(): parser = argparse.ArgumentParser( description="Parse regexes out of python files and scan them for REDoS" ) - parser.add_argument("files", nargs="+", help="Python files") + parser.add_argument("files", nargs="+", help="Python files or directories") parser.add_argument( "--glob", action="store_true", help="Glob the input filenames (**/*)" ) + parser.add_argument("--verbose", action="store_true", help="Verbose logging") + parser.add_argument( + "--ignore", action="append", help="Paths containing this string are ignored" + ) args = parser.parse_args() - files = ( - (fname for fglob in args.files for fname in iglob(fglob, recursive=True)) - if args.glob - else iter(args.files) - ) + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + files = file_generator(args.files, args.glob, ["*.py"], args.ignore) output = TextOutput() for filename in files: + logging.debug(filename) handle_file(filename, output) print(f"Processed {output.regexes} regexes") diff --git a/regexploit/bin/regexploit_yaml.py b/regexploit/bin/regexploit_yaml.py index 02e6c4e..3912914 100644 --- a/regexploit/bin/regexploit_yaml.py +++ b/regexploit/bin/regexploit_yaml.py @@ -1,13 +1,12 @@ #!/usr/bin/env python import argparse import json -import os.path +import logging import re import warnings -from glob import iglob - from regexploit.ast.sre import SreOpParser +from regexploit.bin.files import file_generator from regexploit.python_node_visitor import PythonNodeVisitor from regexploit.redos import find from regexploit.output.text import TextOutput @@ -71,15 +70,24 @@ def main(get_object=get_json): parser.add_argument( "--glob", action="store_true", help="Glob the input filenames (**/*)" ) + parser.add_argument("--verbose", action="store_true", help="Verbose logging") + parser.add_argument( + "--ignore", action="append", help="Paths containing this string are ignored" + ) args = parser.parse_args() - files = ( - (fname for fglob in args.files for fname in iglob(fglob, recursive=True)) - if args.glob - else iter(args.files) + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + files = file_generator( + args.files, + args.glob, + ["*.json"] if get_object is get_json else ["*.yaml", "*.yml", "*.json"], + args.ignore, ) output = TextOutput() for filename in files: + logging.debug(filename) handle_file(get_object(filename), filename, output) print(f"Processed {output.regexes} regexes") @@ -99,7 +107,7 @@ def get_yaml(filename: str): main(get_object=get_yaml) except ImportError: print( - "Pyyaml extra required: Install regexploit with 'pip install regexploit[yaml]' or do 'pip install pyyaml'" + "Pyyaml extra required: Install regexploit with 'pip install regexploit[yaml]' or run 'pip install pyyaml'" ) raise