Skip to content

Commit

Permalink
api: add support for simple wildcards (iterative#4864)
Browse files Browse the repository at this point in the history
* api: add support for simple wildcards

Related to iterative#4816.

Signed-off-by: Ioana Grigoropol <[email protected]>

* api: make wildcard interpretation optional

Adds a new argument for the add command `glob` that is disabled by default and when enabled it passes
the input targets through glob filtering.

Related: iterative#4816

Signed-off-by: Ioana Grigoropol <[email protected]>

* Update dvc/repo/add.py

* Update dvc/repo/add.py

* Update dvc/repo/add.py

* Update dvc/repo/add.py

* Update dvc/repo/add.py

Co-authored-by: Ruslan Kuprieiev <[email protected]>
  • Loading branch information
ju0gri and efiop authored Nov 11, 2020
1 parent b8cd024 commit 556143f
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 6 deletions.
7 changes: 7 additions & 0 deletions dvc/command/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def run(self):
no_commit=self.args.no_commit,
fname=self.args.file,
external=self.args.external,
glob=self.args.glob,
)

except DvcException:
Expand Down Expand Up @@ -57,6 +58,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Allow targets that are outside of the DVC repository.",
)
parser.add_argument(
"--glob",
action="store_true",
default=False,
help="Allows targets containing shell-style wildcards.",
)
parser.add_argument(
"--file",
help="Specify name of the DVC-file this command will generate.",
Expand Down
35 changes: 29 additions & 6 deletions dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@
@locked
@scm_context
def add(
repo, targets, recursive=False, no_commit=False, fname=None, external=False
repo,
targets,
recursive=False,
no_commit=False,
fname=None,
external=False,
glob=False,
):
if recursive and fname:
raise RecursiveAddingWhileUsingFilename()
Expand Down Expand Up @@ -57,7 +63,12 @@ def add(
)

stages = _create_stages(
repo, sub_targets, fname, pbar=pbar, external=external
repo,
sub_targets,
fname,
pbar=pbar,
external=external,
glob=glob,
)

try:
Expand Down Expand Up @@ -149,15 +160,27 @@ def _find_all_targets(repo, target, recursive):
return [target]


def _create_stages(repo, targets, fname, pbar=None, external=False):
def _create_stages(
repo, targets, fname, pbar=None, external=False, glob=False
):
from glob import iglob

from dvc.stage import Stage, create_stage

stages = []
if glob:
expanded_targets = [
exp_target
for target in targets
for exp_target in iglob(target, recursive=True)
]
else:
expanded_targets = targets

stages = []
for out in Tqdm(
targets,
expanded_targets,
desc="Creating DVC-files",
disable=len(targets) < LARGE_DIR_SIZE,
disable=len(expanded_targets) < LARGE_DIR_SIZE,
unit="file",
):
path, wdir, out = resolve_paths(repo, out)
Expand Down
71 changes: 71 additions & 0 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,77 @@ def test_add_file_in_dir(tmp_dir, dvc):
assert stage.outs[0].def_path == "subdata"


@pytest.mark.parametrize(
"target, expected_def_paths, expected_rel_paths",
[
(
os.path.join("dir", "subdir", "subdata*"),
["subdata", "subdata123"],
[
os.path.join("dir", "subdir", "subdata") + ".dvc",
os.path.join("dir", "subdir", "subdata123") + ".dvc",
],
),
(
os.path.join("dir", "subdir", "?subdata"),
["esubdata", "isubdata"],
[
os.path.join("dir", "subdir", "esubdata") + ".dvc",
os.path.join("dir", "subdir", "isubdata") + ".dvc",
],
),
(
os.path.join("dir", "subdir", "[aiou]subdata"),
["isubdata"],
[os.path.join("dir", "subdir", "isubdata") + ".dvc"],
),
(
os.path.join("dir", "**", "subdata*"),
["subdata", "subdata123", "subdata4", "subdata5"],
[
os.path.join("dir", "subdir", "subdata") + ".dvc",
os.path.join("dir", "subdir", "subdata123") + ".dvc",
os.path.join("dir", "anotherdir", "subdata4") + ".dvc",
os.path.join("dir", "subdata5") + ".dvc",
],
),
],
)
def test_add_filtered_files_in_dir(
tmp_dir, dvc, target, expected_def_paths, expected_rel_paths
):
tmp_dir.gen(
{
"dir": {
"subdir": {
"subdata": "subdata content",
"esubdata": "extra subdata content",
"isubdata": "i subdata content",
"subdata123": "subdata content 123",
},
"anotherdir": {
"subdata4": "subdata 4 content",
"esubdata": "extra 2 subdata content",
},
"subdata5": "subdata 5 content",
}
}
)

stages = dvc.add(target, glob=True)

assert len(stages) == len(expected_def_paths)
for stage in stages:
assert stage is not None
assert len(stage.deps) == 0
assert len(stage.outs) == 1
assert stage.relpath in expected_rel_paths

# Current dir should not be taken into account
assert stage.wdir == os.path.dirname(stage.path)
assert stage.outs[0].def_path in expected_def_paths


@pytest.mark.parametrize(
"workspace, hash_name, hash_value",
[
Expand Down

0 comments on commit 556143f

Please sign in to comment.