Skip to content

Commit

Permalink
deps: bump dvc-data to 0.31.3
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Jan 15, 2023
1 parent 309ae4d commit a92ace0
Show file tree
Hide file tree
Showing 13 changed files with 66 additions and 34 deletions.
16 changes: 11 additions & 5 deletions dvc/dependency/param.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import typing
from collections import defaultdict
from typing import Dict
from typing import Dict, cast

import dpath.util
from voluptuous import Any
Expand Down Expand Up @@ -41,7 +41,10 @@ def __init__(self, stage, path, params=None, repo=None):
self.params = list(params) if params else []
hash_info = HashInfo()
if isinstance(params, dict):
hash_info = HashInfo(self.PARAM_PARAMS, params)
hash_info = HashInfo(
self.PARAM_PARAMS,
params, # type: ignore[arg-type]
)
repo = repo or stage.repo
path = path or os.path.join(repo.root_dir, self.DEFAULT_PARAMS_FILE)
super().__init__(stage, path, repo=repo)
Expand All @@ -64,7 +67,10 @@ def fill_values(self, values=None):
for param in self.params:
if param in values:
info[param] = values[param]
self.hash_info = HashInfo(self.PARAM_PARAMS, info)
self.hash_info = HashInfo(
self.PARAM_PARAMS,
info, # type: ignore[arg-type]
)

def read_params(
self, flatten: bool = True, **kwargs: typing.Any
Expand Down Expand Up @@ -105,7 +111,7 @@ def workspace_status(self):
from funcy import ldistinct

status: Dict[str, Any] = defaultdict(dict)
info = self.hash_info.value if self.hash_info else {}
info = cast(dict, self.hash_info.value) if self.hash_info else {}
actual = self.read_params()

# NOTE: we want to preserve the order of params as specified in the
Expand Down Expand Up @@ -159,7 +165,7 @@ def get_hash(self):
)
)

return HashInfo(self.PARAM_PARAMS, info)
return HashInfo(self.PARAM_PARAMS, info) # type: ignore[arg-type]

def save(self):
if not self.exists:
Expand Down
1 change: 1 addition & 0 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def iter_objs():
tree = Tree.load(odb, hash_info)
yield from (odb.get(hi.value) for _, _, hi in tree)
else:
assert hash_info.value
yield odb.get(hash_info.value)

checked_urls = set()
Expand Down
17 changes: 12 additions & 5 deletions dvc/output.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from collections import defaultdict
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type, cast
from urllib.parse import urlparse

from funcy import collecting, project
Expand Down Expand Up @@ -732,7 +732,8 @@ def _commit_granular_dir(self, filter_info) -> Optional["HashFile"]:
self.hash_name,
ignore=self.dvcignore,
)
save_obj = save_obj.filter(prefix)
save_obj = cast(Tree, save_obj)
save_obj = cast(Tree, save_obj.filter(prefix))
checkout_obj = save_obj.get_obj(self.odb, prefix)
otransfer(
staging,
Expand All @@ -746,7 +747,7 @@ def _commit_granular_dir(self, filter_info) -> Optional["HashFile"]:
def dumpd(self, **kwargs): # noqa: C901
meta = self.meta.to_dict()
meta.pop("isdir", None)
ret = {**self.hash_info.to_dict(), **meta}
ret: Dict[str, Any] = {**self.hash_info.to_dict(), **meta}

if self.is_in_repo:
path = self.fs.path.as_posix(
Expand Down Expand Up @@ -792,11 +793,14 @@ def dumpd(self, **kwargs): # noqa: C901
and self.hash_info.isdir
and (kwargs.get("with_files") or self.files is not None)
):
obj: Optional["HashFile"]
if self.obj:
obj = self.obj
else:
obj = self.get_obj()
ret[self.PARAM_FILES] = obj.as_list(with_meta=True)
if obj:
obj = cast(Tree, obj)
ret[self.PARAM_FILES] = obj.as_list(with_meta=True)

return ret

Expand Down Expand Up @@ -844,6 +848,7 @@ def get_obj(
fs_path = self.fs.path
if filter_info and filter_info != self.fs_path:
prefix = fs_path.relparts(filter_info, self.fs_path)
obj = cast(Tree, obj)
obj = obj.get_obj(self.odb, prefix)

return obj
Expand Down Expand Up @@ -1032,8 +1037,9 @@ def _collect_used_dir_cache(
prefix = self.fs.path.parts(
self.fs.path.relpath(filter_info, self.fs_path)
)
obj = cast(Tree, obj)
return obj.filter(prefix)
return obj
return cast(Tree, obj)

def get_used_objs( # noqa: C901
self, **kwargs
Expand Down Expand Up @@ -1071,6 +1077,7 @@ def get_used_objs( # noqa: C901
logger.warning(msg)
return {}

obj: Optional["HashFile"]
if self.is_dir_checksum:
obj = self._collect_used_dir_cache(**kwargs)
else:
Expand Down
10 changes: 7 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Optional,
Tuple,
Union,
cast,
)

from dvc.exceptions import FileMissingError
Expand All @@ -22,14 +23,15 @@
from dvc.utils.objects import cached_property

if TYPE_CHECKING:
from dvc.fs import FileSystem
from dvc.fs import FileSystem, GitFileSystem
from dvc.fs.data import DataFileSystem
from dvc.fs.dvc import DVCFileSystem
from dvc.lock import LockBase
from dvc.machine import MachineManager
from dvc.scm import Base, Git, NoSCM
from dvc.stage import Stage
from dvc.types import DictStrAny
from dvc_data.hashfile.state import StateBase

from .experiments import Experiments
from .index import Index
Expand Down Expand Up @@ -215,13 +217,14 @@ def __init__(

self.lock: "LockBase"
self.odb: ODBManager
self.state: "StateBase"
if isinstance(self.fs, GitFileSystem) or not self.dvc_dir:
self.lock = LockNoop()
self.state = StateNoop()
self.odb = ODBManager(self)
self.tmp_dir = None
else:
self.fs.makedirs(self.tmp_dir, exist_ok=True)
self.fs.makedirs(cast(str, self.tmp_dir), exist_ok=True)

if isinstance(self.fs, LocalFileSystem):
self.lock = make_lock(
Expand Down Expand Up @@ -322,7 +325,7 @@ def get_rev(self):

with map_scm_exception():
return self.scm.get_rev()
return self.fs.rev
return cast("GitFileSystem", self.fs).rev

@cached_property
def experiments(self) -> "Experiments":
Expand Down Expand Up @@ -541,6 +544,7 @@ def open_by_relpath(self, path, remote=None, mode="r", encoding=None):
from dvc.fs.data import DataFileSystem
from dvc.fs.dvc import DVCFileSystem

fs: Union["FileSystem", DataFileSystem, DVCFileSystem]
if os.path.isabs(path):
fs = DataFileSystem(index=self.index.data["local"])
fs_path = path
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _diff_head_to_index(
repo: "Repo", head: str = "HEAD", **kwargs: Any
) -> Dict[str, List[str]]:
# we need to store objects from index and the HEAD to diff later
objs: Dict[str, Dict[str, Tuple["HashFile", "HashInfo"]]]
objs: Dict[str, Dict[str, Tuple[Optional["HashFile"], "HashInfo"]]]
objs = defaultdict(dict)

staged_diff = defaultdict(list)
Expand Down
8 changes: 6 additions & 2 deletions dvc/repo/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Optional,
Set,
Tuple,
cast,
)

from dvc.exceptions import PathMissingError
Expand All @@ -21,7 +22,8 @@
from dvc.fs import DVCFileSystem, FileSystem
from dvc.output import Output
from dvc.repo import Repo
from dvc_objects.obj import Object
from dvc_data.hashfile.obj import HashFile
from dvc_data.hashfile.tree import Tree

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -164,6 +166,7 @@ def _to_path(output: "Output") -> str:
for target in targets
)

obj: Optional["HashFile"]
if on_working_fs:
_, _, obj = build(
repo.odb.repo,
Expand Down Expand Up @@ -194,6 +197,7 @@ def _to_path(output: "Output") -> str:
)
)
):
obj = cast("Tree", obj)
yield from _dir_output_paths(
output.fs, output.fs_path, obj, targets
)
Expand All @@ -202,7 +206,7 @@ def _to_path(output: "Output") -> str:
def _dir_output_paths(
fs: "FileSystem",
fs_path: str,
obj: "Object",
obj: "Tree",
targets: Optional[Iterable[str]] = None,
) -> Iterator[Tuple[str, str]]:
base = os.path.join(*fs.path.relparts(fs_path))
Expand Down
6 changes: 3 additions & 3 deletions dvc/repo/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from dvc.data_cloud import Remote
from dvc.repo import Repo
from dvc.types import TargetType
from dvc_data.hashfile.db import HashFileDB
from dvc_data.hashfile.transfer import TransferResult
from dvc_objects.db import ObjectDB

logger = logging.getLogger(__name__)

Expand All @@ -31,7 +31,7 @@ def fetch( # noqa: C901
all_commits=False,
run_cache=False,
revs=None,
odb: Optional["ObjectDB"] = None,
odb: Optional["HashFileDB"] = None,
) -> int:
"""Download data items from a cloud and imported repositories
Expand Down Expand Up @@ -133,7 +133,7 @@ def _fetch(
targets: "TargetType",
remote: Optional[str] = None,
jobs: Optional[int] = None,
odb: Optional["ObjectDB"] = None,
odb: Optional["HashFileDB"] = None,
**kwargs,
) -> "TransferResult":
from dvc_data.hashfile.transfer import TransferResult
Expand Down
8 changes: 7 additions & 1 deletion dvc/repo/get.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import logging
import os
from typing import TYPE_CHECKING, Union

from dvc.exceptions import DvcException
from dvc.utils import resolve_output
from dvc.utils.fs import remove

if TYPE_CHECKING:
from dvc.fs.dvc import DVCFileSystem


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -50,9 +55,10 @@ def get(url, path, out=None, rev=None, jobs=None):
with external_repo(
url=url, rev=rev, cache_dir=tmp_dir, cache_types=cache_types
) as repo:
from dvc.fs.data import DataFileSystem

fs: Union[DataFileSystem, "DVCFileSystem"]
if os.path.isabs(path):
from dvc.fs.data import DataFileSystem

fs = DataFileSystem(index=repo.index.data["local"])
fs_path = fs.from_os_path(path)
Expand Down
8 changes: 5 additions & 3 deletions dvc/repo/imports.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING, List, Set, Tuple, Union
from typing import TYPE_CHECKING, List, Set, Tuple, Union, cast

if TYPE_CHECKING:
from dvc.dependency.base import Dependency
Expand Down Expand Up @@ -77,7 +77,7 @@ def unpartial_imports(index: Union["Index", "IndexView"]) -> int:
out.hash_info = dep.get_obj().hash_info
out.meta = dep.get_meta()
else:
out.hash_info = entry.hash_info
out.hash_info = cast("HashInfo", entry.hash_info)
out.meta = entry.meta
out.stage.dump()
updated += out.meta.nfiles if out.meta.nfiles is not None else 1
Expand Down Expand Up @@ -123,7 +123,9 @@ def save_imports(
downloaded.update(
entry.hash_info
for _, entry in data_view.iteritems()
if not entry.meta.isdir and entry.hash_info is not None
if entry.meta is not None
and not entry.meta.isdir
and entry.hash_info is not None
)

if unpartial:
Expand Down
12 changes: 6 additions & 6 deletions dvc/repo/worktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
def _meta_checksum(fs: "FileSystem", meta: "Meta") -> Any:
if not meta or meta.isdir:
return meta
return getattr(meta, fs.PARAM_CHECKSUM)
return getattr(meta, cast(str, fs.PARAM_CHECKSUM))


def worktree_view(
Expand Down Expand Up @@ -161,14 +161,14 @@ def _update_out_meta(
entry.hash_info = old_tree.hash_info
entry.meta = out.meta
for subkey, entry in index.iteritems(key):
if entry.meta.isdir:
if entry.meta is not None and entry.meta.isdir:
continue
fs_path = repo.fs.path.join(repo.root_dir, *subkey)
meta, hash_info = old_tree.get(
repo.fs.path.relparts(fs_path, out.fs_path)
)
) or (None, None)
entry.hash_info = hash_info
if meta.version_id is not None:
if meta is not None and meta.version_id is not None:
# preserve existing version IDs for unchanged files in
# this dir (entry will have the latest remote version
# ID after checkout)
Expand Down Expand Up @@ -236,8 +236,8 @@ def outs_filter(out: "Output") -> bool:

def _fetch_out_changes(
out: "Output",
local_index: "DataIndex",
remote_index: "DataIndex",
local_index: Union["DataIndex", "DataIndexView"],
remote_index: Union["DataIndex", "DataIndexView"],
remote: "Remote",
):
from dvc_data.index import DataIndex, checkout
Expand Down
7 changes: 4 additions & 3 deletions dvc/utils/serialize/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Protocol,
TextIO,
Union,
cast,
)

from funcy import reraise
Expand Down Expand Up @@ -76,7 +77,7 @@ def _load_data(
):
open_fn = fs.open if fs else open
encoding = "utf-8"
with open_fn(path, encoding=encoding) as fd:
with open_fn(path, encoding=encoding) as fd: # type: ignore[operator]
with reraise(UnicodeDecodeError, EncodingError(path, encoding)):
return parser(fd.read(), path)

Expand All @@ -89,7 +90,7 @@ def _dump_data(
**dumper_args,
):
open_fn = fs.open if fs else open
with open_fn(path, "w+", encoding="utf-8") as fd:
with open_fn(path, "w+", encoding="utf-8") as fd: # type: ignore[operator]
dumper(data, fd, **dumper_args)


Expand All @@ -101,7 +102,7 @@ def _modify_data(
fs: Optional["FileSystem"] = None,
):
exists_fn = fs.exists if fs else os.path.exists
file_exists = exists_fn(path)
file_exists = exists_fn(cast(str, path))
data = _load_data(path, parser=parser, fs=fs) if file_exists else {}
yield data
_dump_data(path, data, dumper=dumper, fs=fs)
Loading

0 comments on commit a92ace0

Please sign in to comment.