Skip to content

Commit

Permalink
import-url: allow queries in URL (iterative#3432)
Browse files Browse the repository at this point in the history
* import-url: allow queries in URL

Fixes iterative#3424

* Revert "import-url: allow queries in URL"

This reverts commit 48495d0.

* path_info: add HTTPURLInfo base

* path_info: HTTPURLInfo complete

* remote: http(s): support params, queries, fragments

Fixes iterative#3424
Closes iterative#3432

* tests: path_info: HTTPURLInfo

* lint

* tests: path_info: HTTPURLInfo

Slightly more thorough testing
  • Loading branch information
casperdcl authored Mar 17, 2020
1 parent 137373c commit f19d800
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 1 deletion.
76 changes: 76 additions & 0 deletions dvc/path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,79 @@ class CloudURLInfo(URLInfo):
@property
def path(self):
return self._spath.lstrip("/")


class HTTPURLInfo(URLInfo):
def __init__(self, url):
p = urlparse(url)
stripped = p._replace(params=None, query=None, fragment=None)
super().__init__(stripped.geturl())
self.params = p.params
self.query = p.query
self.fragment = p.fragment

@classmethod
def from_parts(
cls,
scheme=None,
host=None,
user=None,
port=None,
path="",
netloc=None,
params=None,
query=None,
fragment=None,
):
assert bool(host) ^ bool(netloc)

if netloc is not None:
return cls(
"{}://{}{}{}{}{}".format(
scheme,
netloc,
path,
(";" + params) if params else "",
("?" + query) if query else "",
("#" + fragment) if fragment else "",
)
)

obj = cls.__new__(cls)
obj.fill_parts(scheme, host, user, port, path)
obj.params = params
obj.query = query
obj.fragment = fragment
return obj

@property
def _extra_parts(self):
return (self.params, self.query, self.fragment)

@property
def parts(self):
return self._base_parts + self._path.parts + self._extra_parts

@cached_property
def url(self):
return "{}://{}{}{}{}{}".format(
self.scheme,
self.netloc,
self._spath,
(";" + self.params) if self.params else "",
("?" + self.query) if self.query else "",
("#" + self.fragment) if self.fragment else "",
)

def __eq__(self, other):
if isinstance(other, (str, bytes)):
other = self.__class__(other)
return (
self.__class__ == other.__class__
and self._base_parts == other._base_parts
and self._path == other._path
and self._extra_parts == other._extra_parts
)

def __hash__(self):
return hash(self.parts)
2 changes: 2 additions & 0 deletions dvc/remote/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from funcy import cached_property, memoize, wrap_prop, wrap_with

from dvc.path_info import HTTPURLInfo
import dvc.prompt as prompt
from dvc.config import ConfigError
from dvc.exceptions import DvcException, HTTPError
Expand All @@ -25,6 +26,7 @@ def ask_password(host, user):

class RemoteHTTP(RemoteBASE):
scheme = Schemes.HTTP
path_cls = HTTPURLInfo
SESSION_RETRIES = 5
SESSION_BACKOFF_FACTOR = 0.1
REQUEST_TIMEOUT = 10
Expand Down
13 changes: 12 additions & 1 deletion tests/unit/test_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from dvc.path_info import CloudURLInfo
from dvc.path_info import HTTPURLInfo
from dvc.path_info import PathInfo
from dvc.path_info import URLInfo

Expand Down Expand Up @@ -44,13 +45,23 @@ def test_url_info_parents(cls):
]


@pytest.mark.parametrize("cls", [URLInfo, CloudURLInfo])
@pytest.mark.parametrize("cls", [URLInfo, CloudURLInfo, HTTPURLInfo])
def test_url_info_deepcopy(cls):
u1 = cls("ssh://[email protected]:/test1/test2/test3")
u2 = copy.deepcopy(u1)
assert u1 == u2


def test_https_url_info_str():
url = "https://[email protected]/test1;p=par?q=quer#frag"
u = HTTPURLInfo(url)
assert u.url == url
assert str(u) == u.url
assert u.params == "p=par"
assert u.query == "q=quer"
assert u.fragment == "frag"


@pytest.mark.parametrize(
"path, as_posix, osname",
[
Expand Down

0 comments on commit f19d800

Please sign in to comment.