Skip to content

Commit

Permalink
Rename dvcx to DataChain in DVC <> DataChain integration (iterative#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
shcheklein authored Nov 5, 2024
1 parent 70fc4e6 commit 2431ec6
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 76 deletions.
8 changes: 4 additions & 4 deletions .github/.test_durations
Original file line number Diff line number Diff line change
Expand Up @@ -835,9 +835,9 @@
"tests/func/test_dataset.py::test_dvc": 0.46001330000012786,
"tests/func/test_dataset.py::test_dvc_dataset_pipeline": 0.7948393000000351,
"tests/func/test_dataset.py::test_dvc_dump": 0.17996560000005957,
"tests/func/test_dataset.py::test_dvcx": 0.22482930000001033,
"tests/func/test_dataset.py::test_dvcx_dataset_pipeline": 0.43405649999999696,
"tests/func/test_dataset.py::test_dvcx_dump": 0.1858166000000665,
"tests/func/test_dataset.py::test_datachain": 0.22482930000001033,
"tests/func/test_dataset.py::test_datachain_dataset_pipeline": 0.43405649999999696,
"tests/func/test_dataset.py::test_datachain_dump": 0.1858166000000665,
"tests/func/test_dataset.py::test_invalidation": 0.16291839999996682,
"tests/func/test_dataset.py::test_parametrized": 0.19277699999997822,
"tests/func/test_dataset.py::test_pipeline_when_not_in_sync": 0.2063928999999689,
Expand Down Expand Up @@ -1627,7 +1627,7 @@
"tests/unit/command/test_data_sync.py::test_pull": 0.17549940000003517,
"tests/unit/command/test_data_sync.py::test_push": 0.16576090000000931,
"tests/unit/command/test_dataset.py::test_add[spec0-lock0-Adding ds (url:/path @ main)\\n]": 0.22054290000005494,
"tests/unit/command/test_dataset.py::test_add[spec1-lock1-Adding mydataset (dvcx://dataset @ v1)\\n]": 0.22979979999990974,
"tests/unit/command/test_dataset.py::test_add[spec1-lock1-Adding mydataset (dc://dataset @ v1)\\n]": 0.22979979999990974,
"tests/unit/command/test_dataset.py::test_add[spec2-lock2-Adding mydataset (s3://bucket/path)\\n]": 0.20264819999988504,
"tests/unit/command/test_dataset.py::test_add_already_exists": 0.2456899000001158,
"tests/unit/command/test_dataset.py::test_update[spec0-old_lock0-new_lock0-expected_outputs0-missing]": 0.2354315999998562,
Expand Down
12 changes: 6 additions & 6 deletions dvc/api/dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Literal, TypedDict, Union


class DVCXDataset(TypedDict):
type: Literal["dvcx"]
class DatachainDataset(TypedDict):
type: Literal["dc"]
name: str
version: int

Expand All @@ -20,7 +20,7 @@ class URLDataset(TypedDict):
path: str


def get(name: str) -> Union[DVCXDataset, DVCDataset, URLDataset]:
def get(name: str) -> Union[DatachainDataset, DVCDataset, URLDataset]:
from difflib import get_close_matches

from dvc.fs import get_cloud_fs
Expand All @@ -46,9 +46,9 @@ def get(name: str) -> Union[DVCXDataset, DVCDataset, URLDataset]:
path=dataset.lock.path,
sha=dataset.lock.rev_lock,
)
if dataset.type == "dvcx":
return DVCXDataset(
type="dvcx", name=dataset.name_version[0], version=dataset.lock.version
if dataset.type == "dc":
return DatachainDataset(
type="dc", name=dataset.name_version[0], version=dataset.lock.version
)
if dataset.type == "url":
fs_cls, _, path = get_cloud_fs(repo.config, url=dataset.lock.url)
Expand Down
12 changes: 6 additions & 6 deletions dvc/commands/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def display(cls, name: str, dataset: "Dataset", action: str = "Adding"):

url = dataset.spec.url
ver: str = ""
if dataset.type == "dvcx":
if dataset.type == "dc":
ver = f"v{dataset.lock.version}"
if dataset.type == "dvc":
if dataset.lock.path:
Expand All @@ -55,7 +55,7 @@ def run(self):
raise DvcException("--path can't be used without --dvc")

d = vars(self.args)
for key in ["dvc", "dvcx", "url"]:
for key in ["dvc", "dc", "url"]:
if url := d.pop(key, None):
d.update({"type": key, "url": url})
break
Expand Down Expand Up @@ -88,8 +88,8 @@ def display(self, name: str, dataset: "Dataset", new: "Dataset"):
assert new.lock

v: Optional[tuple[str, str]] = None
if dataset.type == "dvcx":
assert new.type == "dvcx"
if dataset.type == "dc":
assert new.type == "dc"
if new.lock.version < dataset.lock.version:
action = "Downgrading"

Expand Down Expand Up @@ -168,7 +168,7 @@ def add_parser(subparsers, parent_parser):

url_exclusive_group = ds_add_parser.add_mutually_exclusive_group(required=True)
url_exclusive_group.add_argument(
"--dvcx", metavar="name", help="Name of the dvcx dataset to track"
"--dc", metavar="name", help="Name of the DataChain dataset to track"
)
url_exclusive_group.add_argument(
"--dvc",
Expand Down Expand Up @@ -219,7 +219,7 @@ def add_parser(subparsers, parent_parser):
"--rev",
"--version",
nargs="?",
help="DVCX dataset version or Git revision (e.g. SHA, branch, tag)",
help="DataChain dataset version or Git revision (e.g. SHA, branch, tag)",
metavar="<version>",
)
ds_update_parser.set_defaults(func=CmdDatasetUpdate)
43 changes: 23 additions & 20 deletions dvc/repo/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from dvc_data.hashfile.meta import Meta

if TYPE_CHECKING:
from dvcx.dataset import DatasetRecord, DatasetVersion # type: ignore[import]
from datachain.dataset import DatasetRecord, DatasetVersion # type: ignore[import]
from typing_extensions import Self

from dvc.repo import Repo
Expand All @@ -29,10 +29,10 @@ def _get_dataset_record(name: str) -> "DatasetRecord":
from dvc.exceptions import DvcException

try:
from dvcx.catalog import get_catalog # type: ignore[import]
from datachain.catalog import get_catalog # type: ignore[import]

except ImportError as exc:
raise DvcException("dvcx is not installed") from exc
raise DvcException("datachain is not installed") from exc

catalog = get_catalog()
return catalog.get_remote_dataset(name)
Expand Down Expand Up @@ -86,7 +86,7 @@ def from_dict(cls, d: dict[str, Any]) -> "Self":
class DatasetSpec(SerDe):
name: str
url: str
type: Literal["dvc", "dvcx", "url"]
type: Literal["dvc", "dc", "url"]


@frozen(kw_only=True)
Expand All @@ -108,7 +108,7 @@ class DVCDatasetLock(DVCDatasetSpec):


@frozen(kw_only=True)
class DVCXDatasetLock(DatasetSpec):
class DatachainDatasetLock(DatasetSpec):
version: int
created_at: datetime = field(converter=to_datetime)

Expand Down Expand Up @@ -160,13 +160,13 @@ def update(self, repo, rev: Optional[str] = None, **kwargs) -> "Self":


@frozen(kw_only=True)
class DVCXDataset:
class DatachainDataset:
manifest_path: str
spec: "DatasetSpec"
lock: "Optional[DVCXDatasetLock]" = field(default=None)
lock: "Optional[DatachainDatasetLock]" = field(default=None)
_invalidated: bool = field(default=False, eq=False, repr=False)

type: ClassVar[Literal["dvcx"]] = "dvcx"
type: ClassVar[Literal["dc"]] = "dc"

@property
def pinned(self) -> bool:
Expand All @@ -193,7 +193,7 @@ def update(
name, _version = self.name_version
version = version if version is not None else _version
version_info = _get_dataset_info(name, record=record, version=version)
lock = DVCXDatasetLock(
lock = DatachainDatasetLock(
**self.spec.to_dict(),
version=version_info.version,
created_at=version_info.created_at,
Expand Down Expand Up @@ -226,9 +226,9 @@ def update(self, repo, **kwargs):
return evolve(self, lock=lock)


Lock = Union[DVCDatasetLock, DVCXDatasetLock, URLDatasetLock]
Lock = Union[DVCDatasetLock, DatachainDatasetLock, URLDatasetLock]
Spec = Union[DatasetSpec, DVCDatasetSpec]
Dataset = Union[DVCDataset, DVCXDataset, URLDataset]
Dataset = Union[DVCDataset, DatachainDataset, URLDataset]


class DatasetNotFoundError(DvcException, KeyError):
Expand Down Expand Up @@ -307,13 +307,13 @@ def _spec_from_info(spec: dict[str, Any]) -> Spec:
raise ValueError("type should be present in spec")
if typ == "dvc":
return DVCDatasetSpec.from_dict(spec)
if typ in {"dvcx", "url"}:
if typ in {"dc", "url"}:
return DatasetSpec.from_dict(spec)
raise ValueError(f"unknown dataset type: {spec.get('type', '')}")

@staticmethod
def _lock_from_info(lock: Optional[dict[str, Any]]) -> Optional[Lock]:
kl = {"dvc": DVCDatasetLock, "dvcx": DVCXDatasetLock, "url": URLDatasetLock}
kl = {"dvc": DVCDatasetLock, "dc": DatachainDatasetLock, "url": URLDatasetLock}
if lock and (cls := kl.get(lock.get("type", ""))): # type: ignore[assignment]
return cls.from_dict(lock) # type: ignore[attr-defined]
return None
Expand Down Expand Up @@ -356,9 +356,9 @@ def _build_dataset(
lock=lock,
invalidated=_invalidated,
)
if spec.type == "dvcx":
assert lock is None or isinstance(lock, DVCXDatasetLock)
return DVCXDataset(
if spec.type == "dc":
assert lock is None or isinstance(lock, DatachainDatasetLock)
return DatachainDataset(
manifest_path=manifest_path,
spec=spec,
lock=lock,
Expand All @@ -374,7 +374,7 @@ def add(
manifest_path: StrPath = "dvc.yaml",
**kwargs: Any,
) -> Dataset:
assert type in {"dvc", "dvcx", "url"}
assert type in {"dvc", "dc", "url"}
kwargs.update({"name": name, "url": url, "type": type})
dataset = self._build_dataset(os.path.abspath(manifest_path), kwargs)
dataset = dataset.update(self.repo)
Expand All @@ -389,13 +389,16 @@ def update(self, name, **kwargs) -> tuple[Dataset, Dataset]:

if dataset.type == "url" and (version or kwargs.get("rev")):
raise ValueError("cannot update version/revision for a url")
if dataset.type == "dvcx" and version is not None:
if dataset.type == "dc" and version is not None:
if not isinstance(version, int):
raise TypeError(
f"dvcx version has to be an integer, got {type(version).__name__!r}"
"DataChain dataset version has to be an integer, "
f"got {type(version).__name__!r}"
)
if version < 1:
raise ValueError(f"dvcx version should be >=1, got {version}")
raise ValueError(
f"DataChain dataset version should be >=1, got {version}"
)

new = dataset.update(self.repo, **kwargs)

Expand Down
6 changes: 3 additions & 3 deletions tests/func/parsing/test_top_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_artifacts(tmp_dir, dvc):
def test_datasets(tmp_dir, dvc):
template = {
"datasets": [
{"name": "${ds1.name}", "url": "${ds1.url}", "type": "dvcx"},
{"name": "${ds1.name}", "url": "${ds1.url}", "type": "dc"},
{
"name": "${ds2.name}",
"url": "${ds2.url}",
Expand All @@ -138,7 +138,7 @@ def test_datasets(tmp_dir, dvc):

(tmp_dir / "params.yaml").dump(
{
"ds1": {"name": "dogs", "url": "dvcx://dogs"},
"ds1": {"name": "dogs", "url": "dc://dogs"},
"ds2": {
"name": "example-get-started",
"url": "[email protected]:iterative/example-get-started.git",
Expand All @@ -153,7 +153,7 @@ def test_datasets(tmp_dir, dvc):

resolver = DataResolver(dvc, tmp_dir, template)
assert resolver.resolve_datasets() == [
{"name": "dogs", "url": "dvcx://dogs", "type": "dvcx"},
{"name": "dogs", "url": "dc://dogs", "type": "dc"},
{
"name": "example-get-started",
"url": "[email protected]:iterative/example-get-started.git",
Expand Down
Loading

0 comments on commit 2431ec6

Please sign in to comment.