Skip to content

Commit

Permalink
output: support loading/dumping dir trees from metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrowla authored and efiop committed Sep 15, 2022
1 parent a9c7032 commit c29e721
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 12 deletions.
2 changes: 1 addition & 1 deletion dvc/dependency/param.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, stage, path, params=None, repo=None):
super().__init__(stage, path, repo=repo)
self.hash_info = hash_info

def dumpd(self):
def dumpd(self, **kwargs):
ret = super().dumpd()
if not self.hash_info:
ret[self.PARAM_PARAMS] = self.params or {}
Expand Down
2 changes: 1 addition & 1 deletion dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def status(self):
def save(self):
pass

def dumpd(self):
def dumpd(self, **kwargs):
return {self.PARAM_PATH: self.def_path, self.PARAM_REPO: self.def_repo}

def download(self, to, jobs=None):
Expand Down
2 changes: 1 addition & 1 deletion dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def dump(self, stage, **kwargs):
if self.verify:
check_dvcfile_path(self.repo, self.path)
logger.debug("Saving information to '%s'.", relpath(self.path))
dump_yaml(self.path, serialize.to_single_stage_file(stage))
dump_yaml(self.path, serialize.to_single_stage_file(stage, **kwargs))
self.repo.scm_context.track_file(self.relpath)

def remove_stage(self, stage): # pylint: disable=unused-argument
Expand Down
33 changes: 31 additions & 2 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def loadd_from(stage, d_list):
live = d.pop(Output.PARAM_LIVE, False)
remote = d.pop(Output.PARAM_REMOTE, None)
annot = {field: d.pop(field, None) for field in ANNOTATION_FIELDS}
files = d.pop(Output.PARAM_FILES, None)
ret.append(
_get(
stage,
Expand All @@ -98,6 +99,7 @@ def loadd_from(stage, d_list):
live=live,
remote=remote,
**annot,
files=files,
)
)
return ret
Expand Down Expand Up @@ -243,6 +245,7 @@ class Output:
PARAM_PATH = "path"
PARAM_CACHE = "cache"
PARAM_CHECKPOINT = "checkpoint"
PARAM_FILES = "files"
PARAM_METRIC = "metric"
PARAM_METRIC_TYPE = "type"
PARAM_METRIC_XPATH = "xpath"
Expand Down Expand Up @@ -292,6 +295,7 @@ def __init__(
remote=None,
repo=None,
fs_config=None,
files=None,
):
self.annot = Annotation(
desc=desc, type=type, labels=labels or [], meta=meta or {}
Expand Down Expand Up @@ -340,6 +344,7 @@ def __init__(
# should be absolute and don't contain remote:// refs.
self.stage = stage
self.meta = meta
self._dump_files = files is not None
self.use_cache = False if self.IS_DEPENDENCY else cache
self.metric = False if self.IS_DEPENDENCY else metric
self.plot = False if self.IS_DEPENDENCY else plot
Expand Down Expand Up @@ -367,6 +372,12 @@ def __init__(
name=self.hash_name,
value=getattr(self.meta, self.hash_name, None),
)
if self.hash_info and self.hash_info.isdir:
self.meta.isdir = True
if files:
tree = Tree.from_list(files, hash_name=self.hash_name)
tree.digest()
self.obj = tree

def _parse_path(self, fs, fs_path):
parsed = urlparse(self.def_path)
Expand Down Expand Up @@ -708,8 +719,10 @@ def _commit_granular_dir(self, filter_info):
)
return checkout_obj

def dumpd(self):
ret = {**self.hash_info.to_dict(), **self.meta.to_dict()}
def dumpd(self, **kwargs):
meta = self.meta.to_dict()
meta.pop("isdir", None)
ret = {**self.hash_info.to_dict(), **meta}

if self.is_in_repo:
path = self.fs.path.as_posix(
Expand Down Expand Up @@ -752,6 +765,15 @@ def dumpd(self):
if self.remote:
ret[self.PARAM_REMOTE] = self.remote

if (
self.use_cache
and self.is_in_repo
and self.hash_info.isdir
and (kwargs.get("with_files") or self._dump_files)
):
assert self.obj
ret[self.PARAM_FILES] = self.obj.as_list(with_meta=True)

return ret

def verify_metric(self):
Expand Down Expand Up @@ -1155,10 +1177,17 @@ def is_plot(self) -> bool:
Output.PARAM_CHECKPOINT: bool,
}

DIR_FILES_SCHEMA: Dict[str, Any] = {
**CHECKSUMS_SCHEMA,
**META_SCHEMA,
Required(Tree.PARAM_RELPATH): str,
}

SCHEMA = {
**ARTIFACT_SCHEMA,
**ANNOTATION_SCHEMA,
Output.PARAM_CACHE: bool,
Output.PARAM_METRIC: Output.METRIC_SCHEMA,
Output.PARAM_REMOTE: str,
Output.PARAM_FILES: [DIR_FILES_SCHEMA],
}
4 changes: 2 additions & 2 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ def update(
def reload(self):
return self.dvcfile.stage

def dumpd(self):
return get_dump(self)
def dumpd(self, **kwargs):
return get_dump(self, **kwargs)

def compute_md5(self):
# `dvc add`ed files don't need stage md5
Expand Down
4 changes: 2 additions & 2 deletions dvc/stage/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ def to_lockfile(stage: "PipelineStage") -> dict:
return {stage.name: to_single_stage_lockfile(stage)}


def to_single_stage_file(stage: "Stage"):
state = stage.dumpd()
def to_single_stage_file(stage: "Stage", **kwargs):
state = stage.dumpd(**kwargs)

# When we load a stage we parse yaml with a fast parser, which strips
# off all the comments and formatting. To retain those on update we do
Expand Down
6 changes: 3 additions & 3 deletions dvc/stage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def resolve_paths(fs, path, wdir=None):
return path, wdir


def get_dump(stage):
def get_dump(stage, **kwargs):
return {
key: value
for key, value in {
Expand All @@ -235,8 +235,8 @@ def get_dump(stage):
stage.PARAM_CMD: stage.cmd,
stage.PARAM_WDIR: resolve_wdir(stage.wdir, stage.path),
stage.PARAM_FROZEN: stage.frozen,
stage.PARAM_DEPS: [d.dumpd() for d in stage.deps],
stage.PARAM_OUTS: [o.dumpd() for o in stage.outs],
stage.PARAM_DEPS: [d.dumpd(**kwargs) for d in stage.deps],
stage.PARAM_OUTS: [o.dumpd(**kwargs) for o in stage.outs],
stage.PARAM_ALWAYS_CHANGED: stage.always_changed,
stage.PARAM_META: stage.meta,
}.items()
Expand Down

0 comments on commit c29e721

Please sign in to comment.