Skip to content

Commit

Permalink
Support parallel linting when many individual files specified (sqlflu…
Browse files Browse the repository at this point in the history
…ff#4084)

* Support parallel linting when many individual files specified

* Add LintedDir to LintingResult

* Fix more tests

* Fix broken test

* Linter.lint_path() forward to Linter.lint_paths()

* Fix Windows tests (path separator)

* Fix issues with Windows tests

* Fix broken test

Co-authored-by: Alan Cruickshank <[email protected]>
  • Loading branch information
barrywhart and alanmcruickshank authored Nov 19, 2022
1 parent 181918e commit f5d726a
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 70 deletions.
95 changes: 37 additions & 58 deletions src/sqlfluff/core/linter/linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,17 +1103,39 @@ def lint_path(
processes: Optional[int] = None,
) -> LintedDir:
"""Lint a path."""
linted_path = LintedDir(path)
if self.formatter:
self.formatter.dispatch_path(path)
fnames = list(
self.paths_from_path(
return self.lint_paths(
(path,), fix, ignore_non_existent_files, ignore_files, processes
).paths[0]

def lint_paths(
self,
paths: Tuple[str, ...],
fix: bool = False,
ignore_non_existent_files: bool = False,
ignore_files: bool = True,
processes: Optional[int] = None,
) -> LintingResult:
"""Lint an iterable of paths."""
# If no paths specified - assume local
if not paths: # pragma: no cover
paths = (os.getcwd(),)
# Set up the result to hold what we get back
result = LintingResult()

expanded_paths: List[str] = []
expanded_path_to_linted_dir = {}
for path in paths:
linted_dir = LintedDir(path)
result.add(linted_dir)
for fname in self.paths_from_path(
path,
ignore_non_existent_files=ignore_non_existent_files,
ignore_files=ignore_files,
)
)
):
expanded_paths.append(fname)
expanded_path_to_linted_dir[fname] = linted_dir

files_count = len(expanded_paths)
if processes is None:
processes = self.config.get("processes", default=1)

Expand All @@ -1131,72 +1153,29 @@ def lint_path(
self.formatter.dispatch_processing_header(effective_processes)

# Show files progress bar only when there is more than one.
files_count = len(fnames)
first_path = expanded_paths[0] if expanded_paths else ""
progress_bar_files = tqdm(
total=files_count,
desc=f"file {os.path.basename(fnames[0] if fnames else '')}",
desc=f"file {first_path}",
leave=False,
disable=files_count <= 1 or progress_bar_configuration.disable_progress_bar,
)

for i, linted_file in enumerate(runner.run(fnames, fix), start=1):
linted_path.add(linted_file)
for i, linted_file in enumerate(runner.run(expanded_paths, fix), start=1):
linted_dir = expanded_path_to_linted_dir[linted_file.path]
linted_dir.add(linted_file)
# If any fatal errors, then stop iteration.
if any(v.fatal for v in linted_file.violations): # pragma: no cover
linter_logger.error("Fatal linting error. Halting further linting.")
break

# Progress bar for files is rendered only when there is more than one file.
# Additionally as it's updated after each loop, we need to get file name
# Additionally, as it's updated after each loop, we need to get file name
# from the next loop. This is why `enumerate` starts with `1` and there
# is `i < len` to not exceed files list length.
progress_bar_files.update(n=1)
if i < len(fnames):
progress_bar_files.set_description(
f"file {os.path.basename(fnames[i])}"
)

return linted_path

def lint_paths(
self,
paths: Tuple[str, ...],
fix: bool = False,
ignore_non_existent_files: bool = False,
ignore_files: bool = True,
processes: Optional[int] = None,
) -> LintingResult:
"""Lint an iterable of paths."""
paths_count = len(paths)

# If no paths specified - assume local
if not paths_count: # pragma: no cover
paths = (os.getcwd(),)
# Set up the result to hold what we get back
result = LintingResult()

progress_bar_paths = tqdm(
total=paths_count,
desc="path",
leave=False,
disable=paths_count <= 1 or progress_bar_configuration.disable_progress_bar,
)
for path in paths:
progress_bar_paths.set_description(f"path {path}")

# Iterate through files recursively in the specified directory (if it's a
# directory) or read the file directly if it's not
result.add(
self.lint_path(
path,
fix=fix,
ignore_non_existent_files=ignore_non_existent_files,
ignore_files=ignore_files,
processes=processes,
)
)

progress_bar_paths.update(1)
if i < len(expanded_paths):
progress_bar_files.set_description(f"file {expanded_paths[i]}")

result.stop_timer()
return result
Expand Down
51 changes: 39 additions & 12 deletions test/cli/commands_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,15 +1272,16 @@ def test__cli__command_lint_nocolor(isatty, should_strip_ansi, capsys, tmpdir):
)
@pytest.mark.parametrize("write_file", [None, "outfile"])
def test__cli__command_lint_serialize_multiple_files(serialize, write_file, tmp_path):
"""Test the output output formats for multiple files.
"""Test the output formats for multiple files.
This tests runs both stdout checking and file checking.
"""
fpath = "test/fixtures/linter/indentation_errors.sql"
fpath1 = "test/fixtures/linter/indentation_errors.sql"
fpath2 = "test/fixtures/linter/multiple_sql_errors.sql"

cmd_args = (
fpath,
fpath,
fpath1,
fpath2,
"--format",
serialize,
"--disable-progress-bar",
Expand Down Expand Up @@ -1313,7 +1314,7 @@ def test__cli__command_lint_serialize_multiple_files(serialize, write_file, tmp_
print("## End Payload")

if serialize == "human":
assert payload_length == 31 if write_file else 32
assert payload_length == 26 if write_file else 32
elif serialize == "json":
result = json.loads(result_payload)
assert len(result) == 2
Expand All @@ -1323,13 +1324,13 @@ def test__cli__command_lint_serialize_multiple_files(serialize, write_file, tmp_
elif serialize == "github-annotation":
result = json.loads(result_payload)
filepaths = {r["file"] for r in result}
assert len(filepaths) == 1
assert len(filepaths) == 2
elif serialize == "github-annotation-native":
result = result_payload.split("\n")
# SQLFluff produces trailing newline
if result[-1] == "":
del result[-1]
assert len(result) == 24
assert len(result) == 17
else:
raise Exception

Expand Down Expand Up @@ -1721,8 +1722,16 @@ def test_cli_lint_enabled_progress_bar_multiple_paths(
)
raw_output = repr(result.output)

assert r"\rpath test/fixtures/linter/passing.sql:" in raw_output
assert r"\rpath test/fixtures/linter/indentation_errors.sql:" in raw_output
sep = os.sep
if sys.platform == "win32":
sep *= 2
assert (
r"\rfile test/fixtures/linter/passing.sql:".replace("/", sep) in raw_output
)
assert (
r"\rfile test/fixtures/linter/indentation_errors.sql:".replace("/", sep)
in raw_output
)
assert r"\rlint by rules:" in raw_output
assert r"\rrule L001:" in raw_output
assert r"\rrule L049:" in raw_output
Expand All @@ -1741,9 +1750,27 @@ def test_cli_lint_enabled_progress_bar_multiple_files(
)
raw_output = repr(result.output)

assert r"\rfile passing.1.sql:" in raw_output
assert r"\rfile passing.2.sql:" in raw_output
assert r"\rfile passing.3.sql:" in raw_output
sep = os.sep
if sys.platform == "win32":
sep *= 2
assert (
r"\rfile test/fixtures/linter/multiple_files/passing.1.sql:".replace(
"/", sep
)
in raw_output
)
assert (
r"\rfile test/fixtures/linter/multiple_files/passing.2.sql:".replace(
"/", sep
)
in raw_output
)
assert (
r"\rfile test/fixtures/linter/multiple_files/passing.3.sql:".replace(
"/", sep
)
in raw_output
)
assert r"\rlint by rules:" in raw_output
assert r"\rrule L001:" in raw_output
assert r"\rrule L049:" in raw_output
Expand Down

0 comments on commit f5d726a

Please sign in to comment.