Skip to content

Commit

Permalink
dataset: provide --rev to track a different dvcx dataset version or g…
Browse files Browse the repository at this point in the history
…it rev (iterative#10325)

* dataset: provide --rev to track a different dvcx dataset version or git rev

* fix message

* add tests; disallow --rev for url datasets
  • Loading branch information
skshetry authored Feb 27, 2024
1 parent 9f66897 commit 4cb658f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
26 changes: 22 additions & 4 deletions dvc/commands/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ def display(self, name: str, dataset: "Dataset", new: "Dataset"):
from dvc.commands.checkout import log_changes
from dvc.ui import ui

action = "Updating"
if not dataset.lock:
return CmdDatasetAdd.display(name, new, "Updating")
return CmdDatasetAdd.display(name, new, action)
if dataset == new:
ui.write("[yellow]Nothing to update[/]", styled=True)
return
Expand All @@ -78,6 +79,9 @@ def display(self, name: str, dataset: "Dataset", new: "Dataset"):
v: Optional[tuple[str, str]] = None
if dataset.type == "dvcx":
assert new.type == "dvcx"
if new.lock.version < dataset.lock.version:
action = "Downgrading"

v = (f"v{dataset.lock.version}", f"v{new.lock.version}")
if dataset.type == "dvc":
assert new.type == "dvc"
Expand All @@ -92,7 +96,7 @@ def display(self, name: str, dataset: "Dataset", new: "Dataset"):
else:
part = ui.rich_text(dataset.spec.url, "repr.url")
changes = ui.rich_text.assemble("(", part, ")")
ui.write("Updating", ui.rich_text(name, "cyan"), changes, styled=True)
ui.write(action, ui.rich_text(name, "cyan"), changes, styled=True)
if dataset.type == "url":
assert new.type == "url"
stats = diff_files(dataset.lock.files, new.lock.files)
Expand All @@ -104,9 +108,17 @@ def run(self):
from dvc.repo.datasets import DatasetNotFoundError
from dvc.ui import ui

version = None
if self.args.rev:
try:
version = int(self.args.rev.lstrip("v"))
except ValueError:
version = self.args.rev

d = vars(self.args) | {"version": version}
with self.repo.scm_context:
try:
dataset, new = self.repo.datasets.update(**vars(self.args))
dataset, new = self.repo.datasets.update(**d)
except DatasetNotFoundError:
logger.exception("")
if matches := get_close_matches(self.args.name, self.repo.datasets):
Expand Down Expand Up @@ -183,9 +195,15 @@ def add_parser(subparsers, parent_parser):
ds_update_parser = ds_subparsers.add_parser(
"update",
parents=[parent_parser],
description=append_doc_link(dataset_update_help, "dataset/add"),
description=append_doc_link(dataset_update_help, "dataset/update"),
formatter_class=formatter.RawDescriptionHelpFormatter,
help=dataset_update_help,
)
ds_update_parser.add_argument("name", help="Name of the dataset to update")
ds_update_parser.add_argument(
"--rev",
nargs="?",
help="DVCX dataset version or Git revision (e.g. SHA, branch, tag)",
metavar="<version>",
)
ds_update_parser.set_defaults(func=CmdDatasetUpdate)
19 changes: 15 additions & 4 deletions dvc/repo/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ def _get_dataset_info(
) -> "DatasetVersion":
record = record or _get_dataset_record(name)
assert record
v = version or record.latest_version
assert v
assert v >= 1
v = record.latest_version if version is None else version
assert v is not None
return record.get_version(v)


Expand Down Expand Up @@ -208,7 +207,7 @@ def update(
**kwargs,
) -> "Self":
name, _version = self.name_version
version = version or _version
version = version if version is not None else _version
version_info = _get_dataset_info(name, record=record, version=version)
lock = DVCXDatasetLock(
**self.spec.to_dict(),
Expand Down Expand Up @@ -400,6 +399,18 @@ def add(

def update(self, name, **kwargs) -> tuple[Dataset, Dataset]:
dataset = self[name]
version = kwargs.get("version")

if dataset.type == "url" and (version or kwargs.get("rev")):
raise ValueError("cannot update version/revision for a url")
if dataset.type == "dvcx" and version is not None:
if not isinstance(version, int):
raise TypeError(
f"dvcx version has to be an integer, got {type(version).__name__!r}"
)
if version < 1:
raise ValueError(f"dvcx version should be >=1, got {version}")

new = dataset.update(self.repo, **kwargs)

self.dump(new, old=dataset)
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/command/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ def test_add_already_exists(dvc, caplog, mocker):
"updated": "Updating mydataset (v1 -> v2)\n",
},
),
(
{"name": "mydataset", "url": "dvcx://dataset", "type": "dvcx"},
{"rev_lock": "0" * 40, "version": 2, "created_at": datetime.now()},
{"version": 1},
{
"missing": "Updating mydataset (dvcx://dataset @ v1)\n",
"unchanged": "Nothing to update\n",
"updated": "Downgrading mydataset (v2 -> v1)\n",
},
),
(
{"name": "mydataset", "url": "s3://bucket/path", "type": "url"},
{
Expand Down

0 comments on commit 4cb658f

Please sign in to comment.