Skip to content

Commit

Permalink
Improve the file processors
Browse files Browse the repository at this point in the history
  • Loading branch information
b-c-ds authored and bcaller committed Mar 6, 2021
1 parent 36e9362 commit be40588
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 25 deletions.
36 changes: 36 additions & 0 deletions regexploit/bin/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import os.path

from glob import iglob
from typing import List, Optional


def _file_generator(
files_argument: List[str], is_glob: bool, filename_globs: List[str]
):
if is_glob:
for fglob in files_argument:
yield from iglob(fglob, recursive=True)
else:
for f in files_argument:
if os.path.isdir(f):
for g in filename_globs:
yield from iglob(os.path.join(f, "**", g), recursive=True)
else:
yield f


def file_generator(
files_argument: List[str],
is_glob: bool,
filename_globs: List[str],
ignore: Optional[List[str]] = None,
):
gen = _file_generator(files_argument, is_glob, filename_globs)
if ignore:
for f in gen:
if any(i in f for i in ignore):
continue
yield f
else:
yield from gen
13 changes: 5 additions & 8 deletions regexploit/bin/regexploit_js.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
import traceback
import warnings

from glob import iglob
from itertools import islice

from regexploit.ast.sre import SreOpParser
from regexploit.bin.files import file_generator
from regexploit.javascript import fix_js_regex
from regexploit.redos import find
from regexploit.output.text import TextOutput
Expand Down Expand Up @@ -98,17 +96,16 @@ def main():
"--glob", action="store_true", help="Glob the input filenames (**/*)"
)
parser.add_argument("--verbose", action="store_true", help="Verbose logging")
parser.add_argument(
"--ignore", action="append", help="Paths containing this string are ignored"
)
args = parser.parse_args()

if args.verbose:
logging.basicConfig(level=logging.DEBUG)

output = TextOutput(js_flavour=True)
files = (
(fname for fglob in args.files for fname in iglob(fglob, recursive=True))
if args.glob
else iter(args.files)
)
files = file_generator(args.files, args.glob, ["*.js", "*.ts"], args.ignore)
while True:
batch = []
for _ in range(50):
Expand Down
23 changes: 14 additions & 9 deletions regexploit/bin/regexploit_python_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import argparse
import ast
import json
import os.path
import logging
import re
import subprocess
import sys
Expand All @@ -12,6 +12,7 @@
from glob import iglob

from regexploit.ast.sre import SreOpParser
from regexploit.bin.files import file_generator
from regexploit.python_node_visitor import PythonNodeVisitor
from regexploit.redos import find
from regexploit.output.text import TextOutput
Expand All @@ -27,8 +28,8 @@ def handle_file(filename: str, output: TextOutput):
except RecursionError:
print(f"RecursionError parsing AST for {filename}")
return
except SyntaxError:
print(f"Bad Python3 syntax in {filename}")
except SyntaxError as e:
print(f"Bad Python3 syntax in {filename}: {e}")
return
for regex in pnv.patterns:
first_for_regex = True
Expand Down Expand Up @@ -67,19 +68,23 @@ def main():
parser = argparse.ArgumentParser(
description="Parse regexes out of python files and scan them for REDoS"
)
parser.add_argument("files", nargs="+", help="Python files")
parser.add_argument("files", nargs="+", help="Python files or directories")
parser.add_argument(
"--glob", action="store_true", help="Glob the input filenames (**/*)"
)
parser.add_argument("--verbose", action="store_true", help="Verbose logging")
parser.add_argument(
"--ignore", action="append", help="Paths containing this string are ignored"
)
args = parser.parse_args()

files = (
(fname for fglob in args.files for fname in iglob(fglob, recursive=True))
if args.glob
else iter(args.files)
)
if args.verbose:
logging.basicConfig(level=logging.DEBUG)

files = file_generator(args.files, args.glob, ["*.py"], args.ignore)
output = TextOutput()
for filename in files:
logging.debug(filename)
handle_file(filename, output)
print(f"Processed {output.regexes} regexes")

Expand Down
24 changes: 16 additions & 8 deletions regexploit/bin/regexploit_yaml.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#!/usr/bin/env python
import argparse
import json
import os.path
import logging
import re
import warnings

from glob import iglob

from regexploit.ast.sre import SreOpParser
from regexploit.bin.files import file_generator
from regexploit.python_node_visitor import PythonNodeVisitor
from regexploit.redos import find
from regexploit.output.text import TextOutput
Expand Down Expand Up @@ -71,15 +70,24 @@ def main(get_object=get_json):
parser.add_argument(
"--glob", action="store_true", help="Glob the input filenames (**/*)"
)
parser.add_argument("--verbose", action="store_true", help="Verbose logging")
parser.add_argument(
"--ignore", action="append", help="Paths containing this string are ignored"
)
args = parser.parse_args()

files = (
(fname for fglob in args.files for fname in iglob(fglob, recursive=True))
if args.glob
else iter(args.files)
if args.verbose:
logging.basicConfig(level=logging.DEBUG)

files = file_generator(
args.files,
args.glob,
["*.json"] if get_object is get_json else ["*.yaml", "*.yml", "*.json"],
args.ignore,
)
output = TextOutput()
for filename in files:
logging.debug(filename)
handle_file(get_object(filename), filename, output)
print(f"Processed {output.regexes} regexes")

Expand All @@ -99,7 +107,7 @@ def get_yaml(filename: str):
main(get_object=get_yaml)
except ImportError:
print(
"Pyyaml extra required: Install regexploit with 'pip install regexploit[yaml]' or do 'pip install pyyaml'"
"Pyyaml extra required: Install regexploit with 'pip install regexploit[yaml]' or run 'pip install pyyaml'"
)
raise

Expand Down

0 comments on commit be40588

Please sign in to comment.