Skip to content

Commit

Permalink
remote: tests: refactor verify test
Browse files Browse the repository at this point in the history
  • Loading branch information
pared committed Jan 22, 2020
1 parent ef1b06d commit b451de5
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 71 deletions.
13 changes: 10 additions & 3 deletions dvc/command/data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def run(self):
with_deps=self.args.with_deps,
force=self.args.force,
recursive=self.args.recursive,
verify=self.args.verify,
)
except DvcException:
logger.exception("failed to pull data from the cloud")
Expand Down Expand Up @@ -66,7 +67,7 @@ def run(self):
all_tags=self.args.all_tags,
with_deps=self.args.with_deps,
recursive=self.args.recursive,
trust_remote=self.args.trust_remote,
verify=self.args.verify,
)
except DvcException:
logger.exception("failed to fetch data from the cloud")
Expand Down Expand Up @@ -146,6 +147,12 @@ def add_parser(subparsers, _parent_parser):
default=False,
help="Pull cache for subdirectories of the specified directory.",
)
pull_parser.add_argument(
"--verify",
action="store_true",
default=False,
help="Verify downloaded files checksums.",
)
pull_parser.set_defaults(func=CmdDataPull)

# Push
Expand Down Expand Up @@ -233,10 +240,10 @@ def add_parser(subparsers, _parent_parser):
help="Fetch cache for subdirectories of specified directory.",
)
fetch_parser.add_argument(
"--trust-remote",
"--verify",
action="store_true",
default=False,
help="Trust remote cache checksums upon fetching.",
help="Verify downloaded files checksums.",
)
fetch_parser.set_defaults(func=CmdDataFetch)

Expand Down
24 changes: 11 additions & 13 deletions dvc/data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,7 @@ def push(self, cache, jobs=None, remote=None, show_checksums=False):
)

def pull(
self,
cache,
jobs=None,
remote=None,
show_checksums=False,
trust_remote=False,
self, cache, jobs=None, remote=None, show_checksums=False, verify=False
):
"""Pull data items in a cloud-agnostic way.
Expand All @@ -86,15 +81,18 @@ def pull(
remote=self.get_remote(remote, "pull"),
show_checksums=show_checksums,
)
if trust_remote:
for checksum in cache["local"].keys():
cache_file = self.repo.cache.local.checksum_to_path_info(
checksum
)
if self.repo.cache.local.exists(cache_file):
self.repo.state.save(cache_file, checksum)

if not verify:
self._save_pulled_checksums(cache)

return downloaded_items_num

def _save_pulled_checksums(self, cache):
for checksum in cache["local"].keys():
cache_file = self.repo.cache.local.checksum_to_path_info(checksum)
if self.repo.cache.local.exists(cache_file):
self.repo.state.save(cache_file, checksum)

def status(self, cache, jobs=None, remote=None, show_checksums=False):
"""Check status of data items in a cloud-agnostic way.
Expand Down
4 changes: 2 additions & 2 deletions dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def _collect_used_dir_cache(
force=False,
jobs=None,
filter_info=None,
trust_remote=False,
verify=False,
):
"""Get a list of `info`s related to the given directory.
Expand Down Expand Up @@ -375,7 +375,7 @@ def _collect_used_dir_cache(
jobs=jobs,
remote=remote,
show_checksums=False,
trust_remote=trust_remote,
verify=verify,
)
except DvcException:
logger.debug("failed to pull cache for '{}'".format(self))
Expand Down
4 changes: 2 additions & 2 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def used_cache(
force=False,
jobs=None,
recursive=False,
trust_remote=False,
verify=False,
):
"""Get the stages related to the given target and collect
the `info` of its outputs.
Expand Down Expand Up @@ -263,7 +263,7 @@ def used_cache(
force=force,
jobs=jobs,
filter_info=filter_info,
trust_remote=trust_remote,
verify=verify,
)
cache.update(used_cache, suffix=suffix)

Expand Down
6 changes: 3 additions & 3 deletions dvc/repo/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _fetch(
with_deps=False,
all_tags=False,
recursive=False,
trust_remote=False,
verify=False,
):
"""Download data items from a cloud and imported repositories
Expand All @@ -43,7 +43,7 @@ def _fetch(
remote=remote,
jobs=jobs,
recursive=recursive,
trust_remote=trust_remote,
verify=verify,
)

downloaded = 0
Expand All @@ -55,7 +55,7 @@ def _fetch(
jobs,
remote=remote,
show_checksums=show_checksums,
trust_remote=trust_remote,
verify=verify,
)
except NoRemoteError:
if not used.external and used["local"]:
Expand Down
4 changes: 2 additions & 2 deletions dvc/repo/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def pull(
all_tags=False,
force=False,
recursive=False,
trust_remote=False,
verify=False,
):
processed_files_count = self._fetch(
targets,
Expand All @@ -27,7 +27,7 @@ def pull(
all_tags=all_tags,
with_deps=with_deps,
recursive=recursive,
trust_remote=trust_remote,
verify=verify,
)
self._checkout(
targets=targets, with_deps=with_deps, force=force, recursive=recursive
Expand Down
59 changes: 13 additions & 46 deletions tests/func/test_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,60 +260,27 @@ def test_modify_missing_remote(dvc):
remote_config.modify("myremote", "gdrive_client_id", "xxx")


def test_trust_remote_checksums(tmp_dir, tmp_path_factory, erepo_dir, mocker):
def test_verify_checksums(tmp_dir, tmp_path_factory, erepo_dir, mocker):
with erepo_dir.chdir():

erepo_dir.dvc_gen({"file": "file content"}, commit="add dir")

Git.clone(fspath(erepo_dir), fspath(tmp_dir))

from dvc.repo import Repo

dvc = Repo(fspath(tmp_dir))

md5_spy = mocker.spy(dvc.cache.local, "get_file_checksum")

dvc.pull(trust_remote=True)

assert md5_spy.call_count == 0


def test_trust_remote_checksums_dir(
tmp_dir, mocker, tmp_path_factory, erepo_dir
):
with erepo_dir.chdir():
erepo_dir.dvc_gen({"dir": {"file": "file content"}}, commit="add dir")
erepo_dir.dvc_gen({"file": "file1 content"}, commit="add file")
erepo_dir.dvc_gen(
{"dir": {"subfile": "file2 content"}}, commit="add " "dir"
)

Git.clone(fspath(erepo_dir), fspath(tmp_dir))

from dvc.repo import Repo

dvc = Repo(fspath(tmp_dir))

md5_spy = mocker.spy(dvc.cache.local, "get_file_checksum")
file_md5_spy = mocker.spy(dvc.cache.local, "get_file_checksum")

dvc.pull(trust_remote=False)
dvc.pull()
assert file_md5_spy.call_count == 0

assert md5_spy.call_count == 0


def test_trust_remote_checksums_external_dep(
tmp_dir, mocker, tmp_path_factory, erepo_dir
):
external_path = tmp_path_factory.mktemp("external_dep") / "file"
external_path.write_text("file content")

with erepo_dir.chdir():
erepo_dir.dvc_add(fspath(external_path), commit="add external file")

Git.clone(fspath(erepo_dir), fspath(tmp_dir))

from dvc.repo import Repo

dvc = Repo(fspath(tmp_dir))

md5_spy = mocker.spy(dvc.cache.local, "get_file_checksum")

dvc.pull(trust_remote=True)
# Removing cache will invalidate existing state entries
shutil.rmtree(dvc.cache.local.cache_dir)

assert md5_spy.call_count == 0
dvc.pull(verify=True)
assert file_md5_spy.call_count == 3
pass

0 comments on commit b451de5

Please sign in to comment.