Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Dave Berenbaum authored and dberenbaum committed May 23, 2024
1 parent 856c902 commit 1f7733f
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
30 changes: 17 additions & 13 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,24 +418,28 @@ def transfer(

@rwlocked(read=["deps"], write=["outs"])
def reproduce(self, interactive=False, **kwargs) -> Optional["Stage"]:
force = kwargs.get("force", False)
allow_missing = kwargs.get("allow_missing", False)
pull = kwargs.get("pull", False)
allow_missing = kwargs.get("allow_missing", False) | pull
upstream = kwargs.pop("upstream", None)
if not (kwargs.get("force", False) or self.changed(allow_missing, upstream)):
if force:
pass
# Skip stages with missing data if otherwise unchanged
elif not self.changed(allow_missing, upstream):
if not isinstance(self, PipelineStage) and self.is_data_source:
logger.info("'%s' didn't change, skipping", self.addressing)
else:
logger.info("Stage '%s' didn't change, skipping", self.addressing)
return None
# Pull stages with missing data if otherwise unchanged
elif not self.changed(True, upstream) and pull:
try:
# Pull missing data
if pull and self.changed(False, upstream):
logger.info("Pulling data for %s", self)
self.repo.pull(self.addressing, jobs=kwargs.get("jobs", None))
self.checkout()
logger.info("Pulling data for %s", self)
self.repo.pull(self.addressing, jobs=kwargs.get("jobs", None))
self.checkout()
return None
except CheckoutError:
logger.info("Unable to pull data for %s", self)
else:
if not isinstance(self, PipelineStage) and self.is_data_source:
logger.info("'%s' didn't change, skipping", self.addressing)
else:
logger.info("Stage '%s' didn't change, skipping", self.addressing)
return None

msg = f"Going to reproduce {self}. Are you sure you want to continue?"
if interactive and not prompt.confirm(msg):
Expand Down
18 changes: 18 additions & 0 deletions tests/func/repro/test_repro_pull.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

import pytest

from dvc.stage.cache import RunCacheNotSupported
from dvc.utils.fs import remove

Expand Down Expand Up @@ -81,3 +83,19 @@ def test_repro_pulls_persisted_output(tmp_dir, dvc, mocker, local_remote):

# stage is skipped
assert not dvc.reproduce(pull=True)


@pytest.mark.parametrize("allow_missing", [True, False])
def test_repro_pulls_allow_missing(tmp_dir, dvc, mocker, local_remote, allow_missing):
tmp_dir.dvc_gen("foo", "foo")

dvc.push()

dvc.stage.add(name="copy-foo", cmd="cp foo bar", deps=["foo"], outs=["bar"])
dvc.reproduce()
remove("foo")

# stage is skipped
assert not dvc.reproduce(pull=True, allow_missing=allow_missing)
# data only pulled if allow_missing is false
assert (tmp_dir / "foo").exists() != allow_missing

0 comments on commit 1f7733f

Please sign in to comment.