Skip to content

Commit

Permalink
dvc: preserve comments/meta on add/imp/run (iterative#5001)
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop authored Dec 1, 2020
1 parent 1e52bc9 commit 7c45711
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 18 deletions.
2 changes: 0 additions & 2 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,6 @@ def _dump_pipeline_file(self, stage):

if existing_entry:
orig_stage_data = data["stages"][stage.name]
if "meta" in orig_stage_data:
stage_data[stage.name]["meta"] = orig_stage_data["meta"]
apply_diff(stage_data[stage.name], orig_stage_data)
else:
data["stages"].update(stage_data)
Expand Down
3 changes: 2 additions & 1 deletion dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _create_stages(
):
from glob import iglob

from dvc.stage import Stage, create_stage
from dvc.stage import Stage, create_stage, restore_meta

if glob:
expanded_targets = [
Expand All @@ -181,6 +181,7 @@ def _create_stages(
outs=[out],
external=external,
)
restore_meta(stage)
if stage.can_be_skipped:
stage = None
else:
Expand Down
4 changes: 2 additions & 2 deletions dvc/repo/imp_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def imp_url(
desc=None,
):
from dvc.dvcfile import Dvcfile
from dvc.stage import Stage, create_stage
from dvc.stage import Stage, create_stage, restore_meta

out = resolve_output(url, out)
path, wdir, out = resolve_paths(self, out)
Expand All @@ -43,7 +43,7 @@ def imp_url(
outs=[out],
erepo=erepo,
)

restore_meta(stage)
if stage.can_be_skipped:
return None

Expand Down
3 changes: 2 additions & 1 deletion dvc/repo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _check_stage_exists(dvcfile, stage):
@scm_context
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
from dvc.dvcfile import PIPELINE_FILE, Dvcfile
from dvc.stage import Stage, create_stage
from dvc.stage import Stage, create_stage, restore_meta

if not kwargs.get("cmd"):
raise InvalidArgumentError("command is not specified")
Expand Down Expand Up @@ -113,6 +113,7 @@ def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
stage = create_stage(
stage_cls, repo=self, path=path, params=params, **kwargs
)
restore_meta(stage)
if kwargs.get("run_cache", True) and stage.can_be_skipped:
return None

Expand Down
21 changes: 21 additions & 0 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def loads_from(cls, repo, path, wdir, data):
Stage.PARAM_ALWAYS_CHANGED,
Stage.PARAM_MD5,
Stage.PARAM_DESC,
Stage.PARAM_META,
"name",
],
),
Expand Down Expand Up @@ -90,6 +91,24 @@ def create_stage(cls, repo, path, external=False, **kwargs):
return stage


def restore_meta(stage):
from .exceptions import StageNotFound

if not stage.dvcfile.exists():
return

try:
old = stage.reload()
except StageNotFound:
return

# will be used to restore comments later
# noqa, pylint: disable=protected-access
stage._stage_text = old._stage_text

stage.meta = old.meta


class Stage(params.StageParams):
# pylint:disable=no-value-for-parameter
# rwlocked() confuses pylint
Expand All @@ -109,6 +128,7 @@ def __init__(
stage_text=None,
dvcfile=None,
desc=None,
meta=None,
):
if deps is None:
deps = []
Expand All @@ -127,6 +147,7 @@ def __init__(
self._stage_text = stage_text
self._dvcfile = dvcfile
self.desc = desc
self.meta = meta
self.raw_data = RawData()

@property
Expand Down
1 change: 1 addition & 0 deletions dvc/stage/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def load_stage(cls, dvcfile, name, stage_data, lock_data=None):
stage = loads_from(PipelineStage, dvcfile.repo, path, wdir, stage_data)
stage.name = name
stage.desc = stage_data.get(Stage.PARAM_DESC)
stage.meta = stage_data.get(Stage.PARAM_META)

deps = project(stage_data, [stage.PARAM_DEPS, stage.PARAM_PARAMS])
fill_stage_dependencies(stage, **deps)
Expand Down
5 changes: 1 addition & 4 deletions dvc/stage/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def to_pipeline_file(stage: "PipelineStage"):
(stage.PARAM_PLOTS, plots),
(stage.PARAM_FROZEN, stage.frozen),
(stage.PARAM_ALWAYS_CHANGED, stage.always_changed),
(stage.PARAM_META, stage.meta),
]
return {
stage.name: OrderedDict([(key, value) for key, value in res if value])
Expand Down Expand Up @@ -185,10 +186,6 @@ def to_single_stage_file(stage: "Stage"):
text = stage._stage_text # noqa, pylint: disable=protected-access
if text is not None:
saved_state = parse_yaml_for_update(text, stage.path)
# Stage doesn't work with meta in any way, so .dumpd() doesn't
# have it. We simply copy it over.
if "meta" in saved_state:
state["meta"] = saved_state["meta"]
apply_diff(state, saved_state)
state = saved_state
return state
1 change: 1 addition & 0 deletions dvc/stage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def get_dump(stage):
stage.PARAM_DEPS: [d.dumpd() for d in stage.deps],
stage.PARAM_OUTS: [o.dumpd() for o in stage.outs],
stage.PARAM_ALWAYS_CHANGED: stage.always_changed,
stage.PARAM_META: stage.meta,
}.items()
if value
}
Expand Down
25 changes: 25 additions & 0 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import shutil
import textwrap
import time

import colorama
Expand Down Expand Up @@ -922,3 +923,27 @@ def test_add_with_cache_link_error(tmp_dir, dvc, mocker, caplog):
assert (tmp_dir / ".dvc" / "cache").read_text() == {
"ac": {"bd18db4cc2f85cedef654fccc4a4d8": "foo"}
}


def test_add_preserve_meta(tmp_dir, dvc):
text = textwrap.dedent(
"""\
# top comment
outs:
- path: foo # out comment
meta: some metadata
"""
)
tmp_dir.gen("foo.dvc", text)

tmp_dir.dvc_gen("foo", "foo")
assert (tmp_dir / "foo.dvc").read_text() == textwrap.dedent(
"""\
# top comment
outs:
- path: foo # out comment
md5: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
meta: some metadata
"""
)
2 changes: 1 addition & 1 deletion tests/func/test_dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ def test_dvcfile_dump_preserves_meta(tmp_dir, dvc, run_copy):

data = dvcfile._load()[0]
metadata = {"name": "copy-file"}
stage.meta = metadata
data["stages"]["run_copy"]["meta"] = metadata
dump_yaml(dvcfile.path, data)

dvcfile.dump(stage)
assert dvcfile._load()[0] == data
Expand Down
34 changes: 34 additions & 0 deletions tests/func/test_import_url.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import textwrap
from uuid import uuid4

import pytest
Expand Down Expand Up @@ -207,3 +208,36 @@ def test_import_url_dir(tmp_dir, dvc, workspace, stage_md5, dir_md5):
)

assert dvc.status() == {}


def test_import_url_preserve_meta(tmp_dir, dvc):
text = textwrap.dedent(
"""\
# top comment
deps:
- path: foo # dep comment
outs:
- path: bar # out comment
meta: some metadata
"""
)
tmp_dir.gen("bar.dvc", text)

tmp_dir.gen("foo", "foo")
dvc.imp_url("foo", out="bar")
assert (tmp_dir / "bar.dvc").read_text() == textwrap.dedent(
"""\
# top comment
deps:
- path: foo # dep comment
md5: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
outs:
- path: bar # out comment
md5: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
meta: some metadata
md5: be7ade0aa89cc8d56e320867a9de9740
frozen: true
"""
)
11 changes: 4 additions & 7 deletions tests/func/test_run_single_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,10 +1005,7 @@ def test_metrics_dir(tmp_dir, dvc, caplog, run_copy_metrics, metrics_type):
)


def test_run_force_doesnot_preserve_comments_and_meta(tmp_dir, dvc, run_copy):
"""Depends on loading of stage on `run` where we don't check the file
for stage already exists, so we don't copy `stage_text` over due to which
`meta` and `comments` don't get preserved."""
def test_run_force_preserves_comments_and_meta(tmp_dir, dvc, run_copy):
tmp_dir.gen({"foo": "foo", "foo1": "foo1"})
text = textwrap.dedent(
"""\
Expand All @@ -1017,7 +1014,7 @@ def test_run_force_doesnot_preserve_comments_and_meta(tmp_dir, dvc, run_copy):
- path: copy.py
- path: foo
outs:
# comment not preserved
# comment preserved
- path: bar
meta:
name: copy-foo-bar
Expand All @@ -1030,5 +1027,5 @@ def test_run_force_doesnot_preserve_comments_and_meta(tmp_dir, dvc, run_copy):

run_copy("foo1", "bar1", single_stage=True, force=True, fname="bar.dvc")

assert "comment" not in (tmp_dir / "bar.dvc").read_text()
assert "meta" not in (tmp_dir / "bar.dvc").read_text()
assert "comment" in (tmp_dir / "bar.dvc").read_text()
assert "meta" in (tmp_dir / "bar.dvc").read_text()

0 comments on commit 7c45711

Please sign in to comment.