Skip to content

Commit

Permalink
cloud-versioning: better handling for directories
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Sep 25, 2022
1 parent c039e9a commit 8c48464
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 36 deletions.
8 changes: 4 additions & 4 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,10 @@ def dump(
self._dump_pipeline_file(stage)

if update_lock:
self._dump_lockfile(stage)
self._dump_lockfile(stage, **kwargs)

def _dump_lockfile(self, stage):
self._lockfile.dump(stage)
def _dump_lockfile(self, stage, **kwargs):
self._lockfile.dump(stage, **kwargs)

@staticmethod
def _check_if_parametrized(stage, action: str = "dump") -> None:
Expand Down Expand Up @@ -366,7 +366,7 @@ def latest_version_info(self):
return {SCHEMA_KWD: version}

def dump(self, stage, **kwargs):
stage_data = serialize.to_lockfile(stage)
stage_data = serialize.to_lockfile(stage, **kwargs)

with modify_yaml(self.path, fs=self.repo.fs) as data:
version = LOCKFILE_VERSION.from_dict(data)
Expand Down
7 changes: 2 additions & 5 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,11 +765,8 @@ def dumpd(self, **kwargs):
if self.remote:
ret[self.PARAM_REMOTE] = self.remote

if (
self.use_cache
and self.is_in_repo
and self.hash_info.isdir
and (kwargs.get("with_files") or self.files is not None)
if self.hash_info.isdir and (
kwargs.get("with_files") or self.files is not None
):
if self.obj:
obj = self.obj
Expand Down
19 changes: 1 addition & 18 deletions dvc/repo/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def _fetch_worktree(repo, remote):
from dvc_data.index import md5, save
from dvc_data.index import save

index = repo.index.data["repo"]
for key, entry in index.iteritems():
Expand All @@ -24,24 +24,7 @@ def _fetch_worktree(repo, remote):
remote.path,
*key,
)
md5(index)
save(index)

for stage in repo.index.stages:
for out in stage.outs:
if not out.use_cache:
continue

if not out.is_in_repo:
continue

workspace, key = out.index_key
entry = repo.index.data[workspace][key]
out.hash_info = entry.hash_info
out.meta = entry.meta

stage.dvcfile.dump(stage)

return len(index)


Expand Down
5 changes: 4 additions & 1 deletion dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,12 @@ def data(self) -> "Dict[str, DataIndex]":

data_index = by_workspace[workspace]

if out.files:
out.obj = out.get_obj()

data_index[key] = DataIndexEntry(
meta=out.meta,
obj=out.get_obj() if out.files else out.obj,
obj=out.obj,
hash_info=out.hash_info,
odb=out.odb,
cache=out.odb,
Expand Down
6 changes: 2 additions & 4 deletions dvc/repo/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@

def _push_worktree(repo, remote):
from dvc_data.hashfile.tree import tree_from_index
from dvc_data.index import build, checkout, collect
from dvc_data.index import checkout, collect

index = repo.index.data["repo"]
checkout(index, remote.path, remote.fs, force=True)
build(index, remote.path, remote.fs)
if any(out.isdir() for out in repo.index.outs):
collect(index, remote.path, remote.fs)
collect(index, remote.path, remote.fs, update=True)

for stage in repo.index.stages:
for out in stage.outs:
Expand Down
1 change: 1 addition & 0 deletions dvc/stage/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def fill_from_lock(stage, lock_data=None):
item.hash_info = HashInfo(item.hash_name, hash_value)
if item.hash_info and item.hash_info.isdir:
item.meta.isdir = True
item.files = get_in(checksums, [key, path, item.PARAM_FILES])

@classmethod
def load_stage(cls, dvcfile, name, stage_data, lock_data=None):
Expand Down
13 changes: 10 additions & 3 deletions dvc/stage/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def to_pipeline_file(stage: "PipelineStage"):
}


def to_single_stage_lockfile(stage: "Stage") -> dict:
def to_single_stage_lockfile(stage: "Stage", **kwargs) -> dict:
assert stage.cmd

def _dumpd(item):
Expand All @@ -164,6 +164,13 @@ def _dumpd(item):
*meta_d.items(),
]

if item.hash_info.isdir and kwargs.get("with_files"):
if item.obj:
obj = item.obj
else:
obj = item.get_obj()
ret.append((item.PARAM_FILES, obj.as_list(with_meta=True)))

return OrderedDict(ret)

res = OrderedDict([("cmd", stage.cmd)])
Expand All @@ -183,9 +190,9 @@ def _dumpd(item):
return res


def to_lockfile(stage: "PipelineStage") -> dict:
def to_lockfile(stage: "PipelineStage", **kwargs) -> dict:
assert stage.name
return {stage.name: to_single_stage_lockfile(stage)}
return {stage.name: to_single_stage_lockfile(stage, **kwargs)}


def to_single_stage_file(stage: "Stage", **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ install_requires =
dvc-render==0.0.11
dvc-task==0.1.2
dvclive>=0.10.0
dvc-data==0.12.0
dvc-data==0.13.0
dvc-http==2.27.2
hydra-core>=1.1.0
iterative-telemetry==0.0.5
Expand Down

0 comments on commit 8c48464

Please sign in to comment.