Skip to content

Commit

Permalink
deps/outs: get rid of remotes in favor of trees (iterative#4208)
Browse files Browse the repository at this point in the history
Outputs and dependencies don't really care about the remotes, as they
don't do any pushing or pulling. What they really need are the trees,
so let's use them directly.

Part of iterative#4050
  • Loading branch information
efiop authored Jul 15, 2020
1 parent 58f6534 commit bbb1c59
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 49 deletions.
6 changes: 3 additions & 3 deletions dvc/dependency/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dvc.dependency.s3 import S3Dependency
from dvc.dependency.ssh import SSHDependency
from dvc.output.base import BaseOutput
from dvc.remote import get_remote
from dvc.remote import get_cloud_tree
from dvc.scheme import Schemes

from .repo import RepoDependency
Expand Down Expand Up @@ -54,8 +54,8 @@
def _get(stage, p, info):
parsed = urlparse(p) if p else None
if parsed and parsed.scheme == "remote":
remote = get_remote(stage.repo, name=parsed.netloc)
return DEP_MAP[remote.scheme](stage, p, info, remote=remote)
tree = get_cloud_tree(stage.repo, name=parsed.netloc)
return DEP_MAP[tree.scheme](stage, p, info, tree=tree)

if info and info.get(RepoDependency.PARAM_REPO):
repo = info.pop(RepoDependency.PARAM_REPO)
Expand Down
2 changes: 1 addition & 1 deletion dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, def_repo, stage, *args, **kwargs):
self.def_repo = def_repo
super().__init__(stage, *args, **kwargs)

def _parse_path(self, remote, path):
def _parse_path(self, tree, path):
return None

@property
Expand Down
12 changes: 6 additions & 6 deletions dvc/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dvc.output.local import LocalOutput
from dvc.output.s3 import S3Output
from dvc.output.ssh import SSHOutput
from dvc.remote import get_remote
from dvc.remote import get_cloud_tree
from dvc.remote.hdfs import HDFSRemoteTree
from dvc.remote.local import LocalRemoteTree
from dvc.remote.s3 import S3RemoteTree
Expand Down Expand Up @@ -66,13 +66,13 @@ def _get(
parsed = urlparse(p)

if parsed.scheme == "remote":
remote = get_remote(stage.repo, name=parsed.netloc)
return OUTS_MAP[remote.scheme](
tree = get_cloud_tree(stage.repo, name=parsed.netloc)
return OUTS_MAP[tree.scheme](
stage,
p,
info,
cache=cache,
remote=remote,
tree=tree,
metric=metric,
plot=plot,
persist=persist,
Expand All @@ -85,7 +85,7 @@ def _get(
p,
info,
cache=cache,
remote=None,
tree=None,
metric=metric,
plot=plot,
persist=persist,
Expand All @@ -95,7 +95,7 @@ def _get(
p,
info,
cache=cache,
remote=None,
tree=None,
metric=metric,
plot=plot,
persist=persist,
Expand Down
52 changes: 25 additions & 27 deletions dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
DvcException,
RemoteCacheRequiredError,
)
from dvc.remote.base import BaseRemoteTree, Remote
from dvc.remote.base import BaseRemoteTree

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -47,7 +47,6 @@ def __init__(self, path):
class BaseOutput:
IS_DEPENDENCY = False

REMOTE_CLS = Remote
TREE_CLS = BaseRemoteTree

PARAM_PATH = "path"
Expand Down Expand Up @@ -85,7 +84,7 @@ def __init__(
stage,
path,
info=None,
remote=None,
tree=None,
cache=True,
metric=False,
plot=False,
Expand All @@ -106,24 +105,23 @@ def __init__(
self.repo = stage.repo if stage else None
self.def_path = path
self.info = info
if remote:
self.remote = remote
if tree:
self.tree = tree
else:
tree = self.TREE_CLS(self.repo, {})
self.remote = self.REMOTE_CLS(tree)
self.tree = self.TREE_CLS(self.repo, {})
self.use_cache = False if self.IS_DEPENDENCY else cache
self.metric = False if self.IS_DEPENDENCY else metric
self.plot = False if self.IS_DEPENDENCY else plot
self.persist = persist

self.path_info = self._parse_path(remote, path)
self.path_info = self._parse_path(tree, path)
if self.use_cache and self.cache is None:
raise RemoteCacheRequiredError(self.path_info)

def _parse_path(self, remote, path):
if remote:
def _parse_path(self, tree, path):
if tree:
parsed = urlparse(path)
return remote.path_info / parsed.path.lstrip("/")
return tree.path_info / parsed.path.lstrip("/")
return self.TREE_CLS.PATH_CLS(path)

def __repr__(self):
Expand Down Expand Up @@ -167,29 +165,29 @@ def cache_path(self):

@property
def checksum_type(self):
return self.remote.tree.PARAM_CHECKSUM
return self.tree.PARAM_CHECKSUM

@property
def checksum(self):
return self.info.get(self.remote.tree.PARAM_CHECKSUM)
return self.info.get(self.tree.PARAM_CHECKSUM)

@checksum.setter
def checksum(self, checksum):
self.info[self.remote.tree.PARAM_CHECKSUM] = checksum
self.info[self.tree.PARAM_CHECKSUM] = checksum

def get_checksum(self):
return self.remote.get_hash(self.path_info)
return self.tree.get_hash(self.path_info)

@property
def is_dir_checksum(self):
return self.remote.is_dir_hash(self.checksum)
return self.tree.is_dir_hash(self.checksum)

@property
def exists(self):
return self.remote.tree.exists(self.path_info)
return self.tree.exists(self.path_info)

def save_info(self):
return self.remote.save_info(self.path_info)
return self.tree.save_info(self.path_info)

def changed_checksum(self):
return self.checksum != self.get_checksum()
Expand Down Expand Up @@ -222,13 +220,13 @@ def changed(self):

@property
def is_empty(self):
return self.remote.tree.is_empty(self.path_info)
return self.tree.is_empty(self.path_info)

def isdir(self):
return self.remote.tree.isdir(self.path_info)
return self.tree.isdir(self.path_info)

def isfile(self):
return self.remote.tree.isfile(self.path_info)
return self.tree.isfile(self.path_info)

# pylint: disable=no-member

Expand Down Expand Up @@ -316,7 +314,7 @@ def verify_metric(self):
raise DvcException(f"verify metric is not supported for {self.scheme}")

def download(self, to):
self.remote.tree.download(self.path_info, to.path_info)
self.tree.download(self.path_info, to.path_info)

def checkout(
self,
Expand All @@ -342,7 +340,7 @@ def checkout(
)

def remove(self, ignore_remove=False):
self.remote.tree.remove(self.path_info)
self.tree.remove(self.path_info)
if self.scheme != "local":
return

Expand All @@ -354,7 +352,7 @@ def move(self, out):
if self.scheme == "local" and self.use_scm_ignore:
self.repo.scm.ignore_remove(self.fspath)

self.remote.tree.move(self.path_info, out.path_info)
self.tree.move(self.path_info, out.path_info)
self.def_path = out.def_path
self.path_info = out.path_info
self.save()
Expand All @@ -373,7 +371,7 @@ def get_files_number(self, filter_info=None):

def unprotect(self):
if self.exists:
self.remote.tree.unprotect(self.path_info)
self.tree.unprotect(self.path_info)

def get_dir_cache(self, **kwargs):
if not self.is_dir_checksum:
Expand Down Expand Up @@ -433,8 +431,8 @@ def collect_used_dir_cache(
filter_path = str(filter_info) if filter_info else None
is_win = os.name == "nt"
for entry in self.dir_cache:
checksum = entry[self.remote.tree.PARAM_CHECKSUM]
entry_relpath = entry[self.remote.tree.PARAM_RELPATH]
checksum = entry[self.tree.PARAM_CHECKSUM]
entry_relpath = entry[self.tree.PARAM_RELPATH]
if is_win:
entry_relpath = entry_relpath.replace("/", os.sep)
entry_path = os.path.join(path, entry_relpath)
Expand Down
7 changes: 3 additions & 4 deletions dvc/output/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
from dvc.exceptions import DvcException
from dvc.istextfile import istextfile
from dvc.output.base import BaseOutput
from dvc.remote.local import LocalRemote, LocalRemoteTree
from dvc.remote.local import LocalRemoteTree
from dvc.utils import relpath
from dvc.utils.fs import path_isin

logger = logging.getLogger(__name__)


class LocalOutput(BaseOutput):
REMOTE_CLS = LocalRemote
TREE_CLS = LocalRemoteTree
sep = os.sep

Expand All @@ -23,10 +22,10 @@ def __init__(self, stage, path, *args, **kwargs):

super().__init__(stage, path, *args, **kwargs)

def _parse_path(self, remote, path):
def _parse_path(self, tree, path):
parsed = urlparse(path)
if parsed.scheme == "remote":
p = remote.path_info / parsed.path.lstrip("/")
p = tree.path_info / parsed.path.lstrip("/")
else:
# NOTE: we can path either from command line or .dvc file,
# so we should expect both posix and windows style paths.
Expand Down
3 changes: 1 addition & 2 deletions dvc/output/ssh.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from dvc.output.base import BaseOutput
from dvc.remote.ssh import SSHRemote, SSHRemoteTree
from dvc.remote.ssh import SSHRemoteTree


class SSHOutput(BaseOutput):
REMOTE_CLS = SSHRemote
TREE_CLS = SSHRemoteTree
6 changes: 3 additions & 3 deletions dvc/repo/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ def _get_granular_checksum(self, path, out, remote=None):
raise FileNotFoundError
dir_cache = out.get_dir_cache(remote=remote)
for entry in dir_cache:
entry_relpath = entry[out.remote.tree.PARAM_RELPATH]
entry_relpath = entry[out.tree.PARAM_RELPATH]
if os.name == "nt":
entry_relpath = entry_relpath.replace("/", os.sep)
if path == out.path_info / entry_relpath:
return entry[out.remote.tree.PARAM_CHECKSUM]
return entry[out.tree.PARAM_CHECKSUM]
raise FileNotFoundError

def open(
Expand Down Expand Up @@ -156,7 +156,7 @@ def _add_dir(self, top, trie, out, download_callback=None, **kwargs):
download_callback(downloaded)

for entry in dir_cache:
entry_relpath = entry[out.remote.tree.PARAM_RELPATH]
entry_relpath = entry[out.tree.PARAM_RELPATH]
if os.name == "nt":
entry_relpath = entry_relpath.replace("/", os.sep)
path_info = out.path_info / entry_relpath
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote):

assert os.path.exists(out.cache_path)
for entry in out.dir_cache:
hash_ = entry[out.remote.tree.PARAM_CHECKSUM]
hash_ = entry[out.tree.PARAM_CHECKSUM]
assert os.path.exists(dvc.cache.local.hash_to_path_info(hash_))


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/dependency/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ def _get_dependency(self):

def test_save_missing(self):
d = self._get_dependency()
with mock.patch.object(d.remote.tree, "exists", return_value=False):
with mock.patch.object(d.tree, "exists", return_value=False):
with self.assertRaises(d.DoesNotExistError):
d.save()
2 changes: 1 addition & 1 deletion tests/unit/output/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _get_output(self):

def test_save_missing(self):
o = self._get_output()
with patch.object(o.remote.tree, "exists", return_value=False):
with patch.object(o.tree, "exists", return_value=False):
with self.assertRaises(o.DoesNotExistError):
o.save()

Expand Down

0 comments on commit bbb1c59

Please sign in to comment.