Skip to content

Commit

Permalink
Merge pull request iterative#3091 from pared/2896_repro
Browse files Browse the repository at this point in the history
repro: tests: migrate to dir helpers
  • Loading branch information
efiop authored Jan 10, 2020
2 parents 90152ea + 0554cbf commit 84ec0d4
Showing 1 changed file with 128 additions and 145 deletions.
273 changes: 128 additions & 145 deletions tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import getpass
import os
import posixpath
import pathlib
import re
import shutil
import uuid
from pathlib import Path
from subprocess import PIPE
from subprocess import Popen
from urllib.parse import urljoin
Expand All @@ -17,6 +17,7 @@
from google.cloud import storage as gc
from mock import patch

from dvc.compat import fspath
from dvc.exceptions import CyclicGraphError
from dvc.exceptions import ReproductionError
from dvc.exceptions import StagePathAsOutputError
Expand Down Expand Up @@ -1372,210 +1373,190 @@ def test(self):


@pytest.fixture
def repro_dir(dvc_repo, repo_dir):
repo_dir.dname = "dir"
os.mkdir(repo_dir.dname)
repo_dir.emptydname = "emptydir"
os.mkdir(repo_dir.emptydname)
subdname = os.path.join(repo_dir.dname, "subdir")
os.mkdir(subdname)

repo_dir.source = "source"
repo_dir.source_stage = repo_dir.source + ".dvc"
stage = dvc_repo.run(
fname=repo_dir.source_stage,
outs=[repo_dir.source],
deps=[repo_dir.FOO],
)
assert stage is not None
assert filecmp.cmp(repo_dir.source, repo_dir.FOO, shallow=False)

repo_dir.unrelated1 = "unrelated1"
repo_dir.unrelated1_stage = repo_dir.unrelated1 + ".dvc"
stage = dvc_repo.run(
fname=repo_dir.unrelated1_stage,
outs=[repo_dir.unrelated1],
deps=[repo_dir.source],
)
assert stage is not None

repo_dir.unrelated2 = "unrelated2"
repo_dir.unrelated2_stage = repo_dir.unrelated2 + ".dvc"
stage = dvc_repo.run(
fname=repo_dir.unrelated2_stage,
outs=[repo_dir.unrelated2],
deps=[repo_dir.DATA],
def repro_dir(tmp_dir, dvc, run_copy):
# Creates repo with following structure:
# data_dir/dir_file origin_data
# | | |
# | | origin_copy.dvc
# unrelated2.dvc | | |
# | | unrelated1.dvc
# dir/subdir/dir_file_copy.dvc |
# | |
# | dir/origin_copy_2.dvc
# | |
# \ /
# \ /
# dir/Dvcfile
tmp_dir.gen(
{
"origin_data": "origin data content",
"data_dir": {"dir_file": "dir file content"},
"dir": {"subdir": {}},
}
)
assert stage is not None

repo_dir.first = os.path.join(repo_dir.dname, "first")
repo_dir.first_stage = repo_dir.first + ".dvc"
stages = {}

stage = dvc_repo.run(
fname=repo_dir.first_stage,
deps=[repo_dir.source],
outs=[repo_dir.first],
cmd="python {} {} {}".format(
repo_dir.CODE, repo_dir.source, repo_dir.first
),
origin_copy = tmp_dir / "origin_copy"
stage = run_copy("origin_data", fspath(origin_copy))
assert stage is not None
assert origin_copy.read_text() == "origin data content"
stages["origin_copy"] = stage

origin_copy_2 = tmp_dir / "dir" / "origin_copy_2"
stage = run_copy(
fspath(origin_copy),
fspath(origin_copy_2),
fname=fspath(origin_copy_2) + ".dvc",
)
assert stage is not None
assert filecmp.cmp(repo_dir.first, repo_dir.FOO, shallow=False)

repo_dir.second = os.path.join(subdname, "second")
repo_dir.second_stage = repo_dir.second + ".dvc"
stage = dvc_repo.run(
fname=repo_dir.second_stage,
outs=[repo_dir.second],
deps=[repo_dir.DATA],
assert origin_copy_2.read_text() == "origin data content"
stages["origin_copy_2"] = stage

dir_file_path = tmp_dir / "data_dir" / "dir_file"
dir_file_copy = tmp_dir / "dir" / "subdir" / "dir_file_copy"
stage = run_copy(
fspath(dir_file_path),
fspath(dir_file_copy),
fname=fspath(dir_file_copy) + ".dvc",
)
assert stage is not None
assert filecmp.cmp(repo_dir.second, repo_dir.DATA, shallow=False)
assert dir_file_copy.read_text() == "dir file content"
stages["dir_file_copy"] = stage

repo_dir.third_stage = os.path.join(repo_dir.dname, "Dvcfile")
stage = dvc_repo.run(
fname=repo_dir.third_stage, deps=[repo_dir.first, repo_dir.second]
last_stage = tmp_dir / "dir" / "Dvcfile"
stage = dvc.run(
fname=fspath(last_stage),
deps=[fspath(origin_copy_2), fspath(dir_file_copy)],
)
assert stage is not None
stages["last_stage"] = stage

# Unrelated are to verify that reproducing `dir` will not trigger them too
assert run_copy(fspath(origin_copy), "unrelated1") is not None
assert run_copy(fspath(dir_file_path), "unrelated2") is not None

yield stages


def _rewrite_file(path_elements, new_content):
if isinstance(path_elements, str):
path_elements = [path_elements]
file = Path(os.sep.join(path_elements))
file.unlink()
file.write_text(new_content)


yield repo_dir
def _read_out(stage):
return Path(stage.outs[0].fspath).read_text()


def test_recursive_repro_default(dvc_repo, repro_dir):
def test_recursive_repro_default(dvc, repro_dir):
"""
Test recursive repro on dir after a dep outside this dir has changed.
"""
os.unlink(repro_dir.FOO)
shutil.copyfile(repro_dir.BAR, repro_dir.FOO)
_rewrite_file("origin_data", "new origin data content")

stages = dvc_repo.reproduce(repro_dir.dname, recursive=True)
# Check that the dependency ("source") and the dependent stages
# inside the folder have been reproduced ("first", "third")
assert len(stages) == 3
names = [stage.relpath for stage in stages]
assert repro_dir.source_stage in names
assert repro_dir.first_stage in names
assert repro_dir.third_stage in names
assert filecmp.cmp(repro_dir.source, repro_dir.BAR, shallow=False)
assert filecmp.cmp(repro_dir.first, repro_dir.BAR, shallow=False)
stages = dvc.reproduce("dir", recursive=True)

# Check that the dependency ("origin_copy") and the dependent stages
# inside the folder have been reproduced ("origin_copy_2", "last_stage")
assert stages == [
repro_dir["origin_copy"],
repro_dir["origin_copy_2"],
repro_dir["last_stage"],
]
assert _read_out(repro_dir["origin_copy"]) == "new origin data content"
assert _read_out(repro_dir["origin_copy_2"]) == "new origin data content"


def test_recursive_repro_single(dvc_repo, repro_dir):
def test_recursive_repro_single(dvc, repro_dir):
"""
Test recursive single-item repro on dir
after a dep outside this dir has changed.
"""
os.unlink(repro_dir.FOO)
shutil.copyfile(repro_dir.BAR, repro_dir.FOO)
_rewrite_file("origin_data", "new origin content")
_rewrite_file(["data_dir", "dir_file"], "new dir file content")

os.unlink(repro_dir.DATA)
shutil.copyfile(repro_dir.BAR, repro_dir.DATA)

stages = dvc_repo.reproduce(
repro_dir.dname, recursive=True, single_item=True
)
stages = dvc.reproduce("dir", recursive=True, single_item=True)
# Check that just stages inside given dir
# with changed direct deps have been reproduced.
# This means that "first" stage should not be reproduced
# since it depends on "source".
# Also check that "second" stage was reproduced before "third" stage
assert len(stages) == 2
assert repro_dir.second_stage == stages[0].relpath
assert repro_dir.third_stage == stages[1].relpath
assert filecmp.cmp(repro_dir.second, repro_dir.BAR, shallow=False)
# This means that "origin_copy_2" stage should not be reproduced
# since it depends on "origin_copy".
# Also check that "dir_file_copy" stage was reproduced before "last_stage"
assert stages == [repro_dir["dir_file_copy"], repro_dir["last_stage"]]
assert _read_out(repro_dir["dir_file_copy"]) == "new dir file content"


def test_recursive_repro_single_force(dvc_repo, repro_dir):
def test_recursive_repro_single_force(dvc, repro_dir):
"""
Test recursive single-item force repro on dir
without any dependencies changing.
"""
stages = dvc_repo.reproduce(
repro_dir.dname, recursive=True, single_item=True, force=True
)
assert len(stages) == 3
names = [stage.relpath for stage in stages]
stages = dvc.reproduce("dir", recursive=True, single_item=True, force=True)
# Check that all stages inside given dir have been reproduced
# Also check that "second" stage was reproduced before "third" stage
# and that "first" stage was reproduced before "third" stage
assert repro_dir.first_stage in names
assert repro_dir.second_stage in names
assert repro_dir.third_stage in names
assert names.index(repro_dir.first_stage) < names.index(
repro_dir.third_stage
# Also check that "dir_file_copy" stage was reproduced before "last_stage"
# and that "origin_copy" stage was reproduced before "last_stage" stage
assert len(stages) == 3
assert set(stages) == {
repro_dir["origin_copy_2"],
repro_dir["dir_file_copy"],
repro_dir["last_stage"],
}
assert stages.index(repro_dir["origin_copy_2"]) < stages.index(
repro_dir["last_stage"]
)
assert names.index(repro_dir.second_stage) < names.index(
repro_dir.third_stage
assert stages.index(repro_dir["dir_file_copy"]) < stages.index(
repro_dir["last_stage"]
)


def test_recursive_repro_empty_dir(dvc_repo, repro_dir):
def test_recursive_repro_empty_dir(tmp_dir, dvc):
"""
Test recursive repro on an empty directory
"""
stages = dvc_repo.reproduce(
repro_dir.emptydname, recursive=True, force=True
)
assert len(stages) == 0
(tmp_dir / "emptydir").mkdir()

stages = dvc.reproduce("emptydir", recursive=True, force=True)
assert stages == []


def test_recursive_repro_recursive_missing_file(dvc_repo):
def test_recursive_repro_recursive_missing_file(dvc):
"""
Test recursive repro on a missing file
"""
with pytest.raises(StageFileDoesNotExistError):
dvc_repo.reproduce("notExistingStage.dvc", recursive=True)
dvc.reproduce("notExistingStage.dvc", recursive=True)
with pytest.raises(StageFileDoesNotExistError):
dvc_repo.reproduce("notExistingDir/", recursive=True)
dvc.reproduce("notExistingDir/", recursive=True)


def test_recursive_repro_on_stage_file(dvc_repo, repro_dir):
def test_recursive_repro_on_stage_file(dvc, repro_dir):
"""
Test recursive repro on a stage file instead of directory
"""
stages = dvc_repo.reproduce(
repro_dir.first_stage, recursive=True, force=True
stages = dvc.reproduce(
repro_dir["origin_copy_2"].relpath, recursive=True, force=True
)
assert len(stages) == 2
names = [stage.relpath for stage in stages]
assert repro_dir.source_stage in names
assert repro_dir.first_stage in names
assert stages == [repro_dir["origin_copy"], repro_dir["origin_copy_2"]]


@pytest.fixture
def foo_copy(repo_dir, dvc_repo):
stages = dvc_repo.add(repo_dir.FOO)
assert len(stages) == 1
foo_stage = stages[0]
assert foo_stage is not None

fname = "foo_copy"
stage_fname = fname + ".dvc"
dvc_repo.run(
fname=stage_fname,
outs=[fname],
deps=[repo_dir.FOO, repo_dir.CODE],
cmd="python {} {} {}".format(repo_dir.CODE, repo_dir.FOO, fname),
)
return {"fname": fname, "stage_fname": stage_fname}


def test_dvc_formatting_retained(dvc_repo, foo_copy):
root = pathlib.Path(dvc_repo.root_dir)
stage_file = root / foo_copy["stage_fname"]
def test_dvc_formatting_retained(tmp_dir, dvc, run_copy):
tmp_dir.dvc_gen("foo", "foo content")
stage = run_copy("foo", "foo_copy", fname="foo_copy.dvc")
stage_path = tmp_dir / stage.relpath

# Add comments and custom formatting to DVC-file
lines = list(map(_format_dvc_line, stage_file.read_text().splitlines()))
lines = list(map(_format_dvc_line, stage_path.read_text().splitlines()))
lines.insert(0, "# Starting comment")
stage_text = "".join(l + "\n" for l in lines)
stage_file.write_text(stage_text)
stage_path.write_text(stage_text)

# Rewrite data source and repro
(root / "foo").write_text("new_foo")
dvc_repo.reproduce(foo_copy["stage_fname"])
(tmp_dir / "foo").write_text("new foo")
dvc.reproduce("foo_copy.dvc", force=True)

# All differences should be only about md5
assert _hide_md5(stage_text) == _hide_md5(stage_file.read_text())
assert _hide_md5(stage_text) == _hide_md5(stage_path.read_text())


def _format_dvc_line(line):
Expand Down Expand Up @@ -1630,10 +1611,12 @@ def test(self):
assert evaluation[2].relpath == "E.dvc"


def test_ssh_dir_out(dvc_repo):
def test_ssh_dir_out(tmp_dir, dvc):
if not _should_test_ssh():
pytest.skip()

tmp_dir.gen({"foo": "foo content"})

# Set up remote and cache
remote_url = get_ssh_url()
assert main(["remote", "add", "upstream", remote_url]) == 0
Expand All @@ -1643,7 +1626,7 @@ def test_ssh_dir_out(dvc_repo):
assert main(["config", "cache.ssh", "sshcache"]) == 0

# Recreating to reread configs
repo = DvcRepo(dvc_repo.root_dir)
repo = DvcRepo(dvc.root_dir)

url_info = URLInfo(remote_url)
mkdir_cmd = "mkdir dir-out;cd dir-out;echo 1 > 1.txt; echo 2 > 2.txt"
Expand Down

0 comments on commit 84ec0d4

Please sign in to comment.