Skip to content

Commit

Permalink
Merge pull request iterative#1990 from efiop/small-fixes
Browse files Browse the repository at this point in the history
dvc: refactoring: unify path/remote logic and naming
  • Loading branch information
efiop authored May 12, 2019
2 parents 4b2dff9 + b4143b1 commit a0c0890
Show file tree
Hide file tree
Showing 42 changed files with 233 additions and 192 deletions.
10 changes: 5 additions & 5 deletions dvc/data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dvc.remote import Remote
from dvc.remote.s3 import RemoteS3
from dvc.remote.gs import RemoteGS
from dvc.remote.azure import RemoteAzure
from dvc.remote.azure import RemoteAZURE
from dvc.remote.oss import RemoteOSS
from dvc.remote.ssh import RemoteSSH
from dvc.remote.hdfs import RemoteHDFS
Expand All @@ -34,7 +34,7 @@ class DataCloud(object):
CLOUD_MAP = {
"aws": RemoteS3,
"gcp": RemoteGS,
"azure": RemoteAzure,
"azure": RemoteAZURE,
"oss": RemoteOSS,
"ssh": RemoteSSH,
"hdfs": RemoteHDFS,
Expand Down Expand Up @@ -124,7 +124,7 @@ def push(self, targets, jobs=None, remote=None, show_checksums=False):
Args:
targets (list): list of targets to push to the cloud.
jobs (int): number of jobs that can be running simultaneously.
remote (dvc.remote.base.RemoteBase): optional remote to push to.
remote (dvc.remote.base.RemoteBASE): optional remote to push to.
By default remote from core.remote config option is used.
show_checksums (bool): show checksums instead of file names in
information messages.
Expand All @@ -142,7 +142,7 @@ def pull(self, targets, jobs=None, remote=None, show_checksums=False):
Args:
targets (list): list of targets to pull from the cloud.
jobs (int): number of jobs that can be running simultaneously.
remote (dvc.remote.base.RemoteBase): optional remote to pull from.
remote (dvc.remote.base.RemoteBASE): optional remote to pull from.
By default remote from core.remote config option is used.
show_checksums (bool): show checksums instead of file names in
information messages.
Expand All @@ -160,7 +160,7 @@ def status(self, targets, jobs=None, remote=None, show_checksums=False):
Args:
targets (list): list of targets to check status for.
jobs (int): number of jobs that can be running simultaneously.
remote (dvc.remote.base.RemoteBase): optional remote to compare
remote (dvc.remote.base.RemoteBASE): optional remote to compare
targets to. By default remote from core.remote config option
is used.
show_checksums (bool): show checksums instead of file names in
Expand Down
18 changes: 11 additions & 7 deletions dvc/dependency/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import schema

from dvc.scheme import Schemes

import dvc.output as output
from dvc.output.base import OutputBase
from dvc.dependency.s3 import DependencyS3
Expand All @@ -10,26 +12,28 @@
from dvc.dependency.hdfs import DependencyHDFS
from dvc.dependency.ssh import DependencySSH
from dvc.dependency.http import DependencyHTTP
from dvc.dependency.https import DependencyHTTPS

from dvc.remote import Remote

DEPS = [
DependencyGS,
DependencyHDFS,
DependencyHTTP,
DependencyHTTPS,
DependencyS3,
DependencySSH,
# NOTE: DependencyLOCAL is the default choice
]

DEP_MAP = {
"local": DependencyLOCAL,
"ssh": DependencySSH,
"s3": DependencyS3,
"gs": DependencyGS,
"hdfs": DependencyHDFS,
"http": DependencyHTTP,
"https": DependencyHTTP,
Schemes.LOCAL: DependencyLOCAL,
Schemes.SSH: DependencySSH,
Schemes.S3: DependencyS3,
Schemes.GS: DependencyGS,
Schemes.HDFS: DependencyHDFS,
Schemes.HTTP: DependencyHTTP,
Schemes.HTTPS: DependencyHTTPS,
}


Expand Down
4 changes: 2 additions & 2 deletions dvc/dependency/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import unicode_literals

from dvc.path.http import HTTPPathInfo
from dvc.path import Path
from dvc.utils.compat import urlparse, urljoin
from dvc.output.base import OutputBase
from dvc.remote.http import RemoteHTTP
Expand All @@ -17,4 +17,4 @@ def __init__(self, stage, path, info=None, remote=None):
if path.startswith("remote"):
path = urljoin(self.remote.cache_dir, urlparse(path).path)

self.path_info = HTTPPathInfo(url=self.url, path=path)
self.path_info = Path(self.scheme, url=self.url, path=path)
8 changes: 8 additions & 0 deletions dvc/dependency/https.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from __future__ import unicode_literals

from dvc.remote.https import RemoteHTTPS
from .http import DependencyHTTP


class DependencyHTTPS(DependencyHTTP):
REMOTE = RemoteHTTPS
2 changes: 1 addition & 1 deletion dvc/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import schema

from dvc.path import Schemes
from dvc.scheme import Schemes
from dvc.utils.compat import urlparse, str

from dvc.output.base import OutputBase
Expand Down
4 changes: 2 additions & 2 deletions dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from dvc.exceptions import DvcException
from dvc.utils.compat import str
from dvc.remote.base import RemoteBase
from dvc.remote.base import RemoteBASE


logger = logging.getLogger(__name__)
Expand All @@ -35,7 +35,7 @@ def __init__(self, path):
class OutputBase(object):
IS_DEPENDENCY = False

REMOTE = RemoteBase
REMOTE = RemoteBASE

PARAM_PATH = "path"
PARAM_CACHE = "cache"
Expand Down
4 changes: 2 additions & 2 deletions dvc/output/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import posixpath

from dvc.path.hdfs import HDFSPathInfo
from dvc.path.hdfs import PathHDFS
from dvc.utils.compat import urlparse
from dvc.output.base import OutputBase
from dvc.remote.hdfs import RemoteHDFS
Expand Down Expand Up @@ -35,4 +35,4 @@ def __init__(
if remote:
path = posixpath.join(remote.url, urlparse(path).path.lstrip("/"))
user = remote.user if remote else self.group("user")
self.path_info = HDFSPathInfo(user=user, url=self.url, path=path)
self.path_info = PathHDFS(user=user, url=self.url, path=path)
4 changes: 2 additions & 2 deletions dvc/output/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import logging

from dvc.path.local import LocalPathInfo
from dvc.path.local import PathLOCAL
from dvc.utils.compat import urlparse
from dvc.istextfile import istextfile
from dvc.exceptions import DvcException
Expand Down Expand Up @@ -50,7 +50,7 @@ def __init__(
p = os.path.join(stage.wdir, p)
p = os.path.abspath(os.path.normpath(p))

self.path_info = LocalPathInfo(url=self.url, path=p)
self.path_info = PathLOCAL(url=self.url, path=p)

def __str__(self):
return self.rel_path
Expand Down
4 changes: 2 additions & 2 deletions dvc/output/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import posixpath

from dvc.path.utils import PathInfo
from dvc.path import Path
from dvc.remote.s3 import RemoteS3
from dvc.utils.compat import urlparse
from dvc.output.base import OutputBase
Expand Down Expand Up @@ -37,6 +37,6 @@ def __init__(
if remote:
path = posixpath.join(remote.prefix, path)

self.path_info = PathInfo(
self.path_info = Path(
self.scheme, bucket=bucket, path=path, url=self.url
)
4 changes: 2 additions & 2 deletions dvc/output/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import getpass
import posixpath

from dvc.path.ssh import SSHPathInfo
from dvc.path.ssh import PathSSH
from dvc.utils.compat import urlparse
from dvc.output.base import OutputBase
from dvc.remote.ssh import RemoteSSH
Expand Down Expand Up @@ -49,6 +49,6 @@ def __init__(
else:
path = parsed.path

self.path_info = SSHPathInfo(
self.path_info = PathSSH(
host=host, user=user, port=port, url=self.url, path=path
)
63 changes: 29 additions & 34 deletions dvc/path/__init__.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,29 @@
from dvc.utils.compat import urlunsplit


class Schemes:
SSH = "ssh"
HDFS = "hdfs"
S3 = "s3"
AZURE = "azure"
HTTP = "http"
GS = "gs"
LOCAL = "local"
OSS = "oss"


class BasePathInfo(object):
scheme = None

def __init__(self, url=None, path=None):
self.url = url
self.path = path

def __str__(self):
return self.url


class DefaultCloudPathInfo(BasePathInfo):
def __init__(self, bucket, url=None, path=None):
super(DefaultCloudPathInfo, self).__init__(url, path)
self.bucket = bucket

def __str__(self):
if not self.url:
return urlunsplit((self.scheme, self.bucket, self.path, "", ""))
return self.url
from dvc.scheme import Schemes

from dvc.path.azure import PathAZURE
from dvc.path.gs import PathGS
from dvc.path.hdfs import PathHDFS
from dvc.path.http import PathHTTP
from dvc.path.https import PathHTTPS
from dvc.path.local import PathLOCAL
from dvc.path.oss import PathOSS
from dvc.path.s3 import PathS3
from dvc.path.ssh import PathSSH


PATH_MAP = {
Schemes.SSH: PathSSH,
Schemes.HDFS: PathHDFS,
Schemes.S3: PathS3,
Schemes.AZURE: PathAZURE,
Schemes.HTTP: PathHTTP,
Schemes.HTTPS: PathHTTPS,
Schemes.GS: PathGS,
Schemes.LOCAL: PathLOCAL,
Schemes.OSS: PathOSS,
}


def Path(scheme, *args, **kwargs):
cls = PATH_MAP[scheme]
return cls(*args, **kwargs)
5 changes: 3 additions & 2 deletions dvc/path/azure.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dvc.path import Schemes, DefaultCloudPathInfo
from dvc.scheme import Schemes
from .base import PathCloudBASE


class AzurePathInfo(DefaultCloudPathInfo):
class PathAZURE(PathCloudBASE):
scheme = Schemes.AZURE
23 changes: 23 additions & 0 deletions dvc/path/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from dvc.utils.compat import urlunsplit


class PathBASE(object):
scheme = None

def __init__(self, url=None, path=None):
self.url = url
self.path = path

def __str__(self):
return self.url


class PathCloudBASE(PathBASE):
def __init__(self, bucket, url=None, path=None):
super(PathCloudBASE, self).__init__(url, path)
self.bucket = bucket

def __str__(self):
if not self.url:
return urlunsplit((self.scheme, self.bucket, self.path, "", ""))
return self.url
5 changes: 3 additions & 2 deletions dvc/path/gs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dvc.path import Schemes, DefaultCloudPathInfo
from dvc.scheme import Schemes
from .base import PathCloudBASE


class GSPathInfo(DefaultCloudPathInfo):
class PathGS(PathCloudBASE):
scheme = Schemes.GS
7 changes: 4 additions & 3 deletions dvc/path/hdfs.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from dvc.utils.compat import urlunsplit
from dvc.path import BasePathInfo, Schemes
from dvc.scheme import Schemes
from .base import PathBASE


class HDFSPathInfo(BasePathInfo):
class PathHDFS(PathBASE):
scheme = Schemes.HDFS

def __init__(self, user, url=None, path=None):
super(HDFSPathInfo, self).__init__(url, path)
super(PathHDFS, self).__init__(url, path)
self.user = user

def __str__(self):
Expand Down
15 changes: 6 additions & 9 deletions dvc/path/http.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from dvc.path import BasePathInfo, Schemes
from dvc.utils.compat import urlparse, urlunsplit
from dvc.scheme import Schemes
from dvc.utils.compat import urlunsplit

from .base import PathBASE

class HTTPPathInfo(BasePathInfo):
@property
def scheme(self):
if self.path:
return urlparse(self.path).scheme
else:
return Schemes.HTTP

class PathHTTP(PathBASE):
scheme = Schemes.HTTP

def __str__(self):
if not self.url:
Expand Down
6 changes: 6 additions & 0 deletions dvc/path/https.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from dvc.scheme import Schemes
from .http import PathHTTP


class PathHTTPS(PathHTTP):
scheme = Schemes.HTTPS
5 changes: 3 additions & 2 deletions dvc/path/local.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os

from dvc.path import BasePathInfo, Schemes
from dvc.scheme import Schemes
from .base import PathBASE


class LocalPathInfo(BasePathInfo):
class PathLOCAL(PathBASE):
scheme = Schemes.LOCAL

def __str__(self):
Expand Down
5 changes: 3 additions & 2 deletions dvc/path/oss.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dvc.path import Schemes, DefaultCloudPathInfo
from dvc.scheme import Schemes
from .base import PathCloudBASE


class OSSPathInfo(DefaultCloudPathInfo):
class PathOSS(PathCloudBASE):
scheme = Schemes.OSS
5 changes: 3 additions & 2 deletions dvc/path/s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dvc.path import Schemes, DefaultCloudPathInfo
from dvc.scheme import Schemes
from .base import PathCloudBASE


class S3PathInfo(DefaultCloudPathInfo):
class PathS3(PathCloudBASE):
scheme = Schemes.S3
Loading

0 comments on commit a0c0890

Please sign in to comment.