Skip to content

Commit

Permalink
Performance improvement of dvcignore (iterative#3967)
Browse files Browse the repository at this point in the history
fix#3869
1.Use big regex.

* Solve windows

* add rule order test

* Solve ignore order

* remove list comprehensions
  • Loading branch information
karajan1001 authored Jun 15, 2020
1 parent bba5023 commit a59f90f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
27 changes: 24 additions & 3 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import logging
import os
import re
from itertools import groupby

from funcy import cached_property
from pathspec import PathSpec
from pathspec.patterns import GitWildMatchPattern
from pathspec.util import normalize_file

from dvc.path_info import PathInfo
from dvc.scm.tree import BaseTree
from dvc.system import System
from dvc.utils import relpath

logger = logging.getLogger(__name__)
Expand All @@ -27,7 +30,16 @@ def __init__(self, ignore_file_path, tree):
self.dirname = os.path.normpath(os.path.dirname(ignore_file_path))

with tree.open(ignore_file_path, encoding="utf-8") as fobj:
self.ignore_spec = PathSpec.from_lines(GitWildMatchPattern, fobj)
path_spec_lines = fobj.readlines()
regex_pattern_list = map(
GitWildMatchPattern.pattern_to_regex, path_spec_lines
)
self.ignore_spec = [
(ignore, re.compile("|".join(item[0] for item in group)))
for ignore, group in groupby(
regex_pattern_list, lambda x: x[1]
)
]

def __call__(self, root, dirs, files):
files = [f for f in files if not self.matches(root, f)]
Expand All @@ -48,7 +60,16 @@ def matches(self, dirname, basename):
else:
return False

return self.ignore_spec.match_file(path)
if not System.is_unix():
path = normalize_file(path)
return self.ignore(path)

def ignore(self, path):
result = False
for ignore, pattern in self.ignore_spec:
if pattern.match(path):
result = ignore
return result

def __hash__(self):
return hash(self.ignore_file_path)
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,18 @@ def test_should_ignore_dir(omit_dir):
new_dirs, _ = ignore(root, dirs, files)

assert set(new_dirs) == {"dir1", "dir2"}


def test_ignore_order():
dvcignore_path = os.path.join(os.path.sep, "ignore_order", ".dvcignore")

patterns = ["!ac*", "a*", "!ab*"]

root = os.path.dirname(dvcignore_path)
dirs = ["ignore_order"]
files = ["ac", "ab", "aa"]

ignore = mock_dvcignore(dvcignore_path, patterns)
_, new_files = ignore(root, dirs, files)

assert {"ab"} == set(new_files)

0 comments on commit a59f90f

Please sign in to comment.