diff --git a/dvc/remote/hdfs.py b/dvc/remote/hdfs.py index 728dbffffa..6d2e7cfb66 100644 --- a/dvc/remote/hdfs.py +++ b/dvc/remote/hdfs.py @@ -165,6 +165,8 @@ def list_cache_paths(self, prefix=None, progress_callback=None): dirs = deque([root]) with self.hdfs(self.path_info) as hdfs: + if prefix and not hdfs.exists(root): + return while dirs: try: entries = hdfs.ls(dirs.pop(), detail=True) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index ffa387f625..d4e613353e 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -98,6 +98,8 @@ def list_cache_paths(self, prefix=None, progress_callback=None): assert self.path_info is not None if prefix: path_info = self.path_info / prefix[:2] + if not self.exists(path_info): + return else: path_info = self.path_info if progress_callback: diff --git a/dvc/remote/ssh/__init__.py b/dvc/remote/ssh/__init__.py index a6d98ead35..eb0482ad6c 100644 --- a/dvc/remote/ssh/__init__.py +++ b/dvc/remote/ssh/__init__.py @@ -265,6 +265,8 @@ def list_cache_paths(self, prefix=None, progress_callback=None): else: root = self.path_info.path with self.ssh(self.path_info) as ssh: + if prefix and not ssh.exists(root): + return # If we simply return an iterator then with above closes instantly if progress_callback: for path in ssh.walk_files(root): diff --git a/scripts/ci/before_install.sh b/scripts/ci/before_install.sh index c7b39dcbfa..25138cd033 100644 --- a/scripts/ci/before_install.sh +++ b/scripts/ci/before_install.sh @@ -15,7 +15,7 @@ fi echo >env.sh -if [[ "$TRAVIS_BUILD_STAGE_NAME" == "Test" ]]; then +if [[ "$TRAVIS_BUILD_STAGE_NAME" == "test" ]]; then if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then # NOTE: ssh keys for ssh test to be able to ssh to the localhost ssh-keygen -t rsa -N "" -f mykey diff --git a/scripts/ci/install_azurite.sh b/scripts/ci/install_azurite.sh index d02223d8a0..48e263684d 100755 --- a/scripts/ci/install_azurite.sh +++ b/scripts/ci/install_azurite.sh @@ -3,13 +3,6 @@ set -e set -x -# install docker -export DEBIAN_FRONTEND=noninteractive -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - -sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -sudo apt-get update -sudo apt-get install -y docker-ce - # run azurite sudo docker run -d --restart always -e executable=blob -p 10000:10000 \ --tmpfs /opt/azurite/folder \ diff --git a/setup.py b/setup.py index 27d598b18d..462796dce0 100644 --- a/setup.py +++ b/setup.py @@ -92,12 +92,7 @@ def run(self): azure = ["azure-storage-blob==2.1.0"] oss = ["oss2==2.6.1"] ssh = ["paramiko>=2.5.0"] -hdfs = [ - # pyarrow-0.16.0 import fails on 3.7 (works on 3.6 though) - # due to: https://issues.apache.org/jira/browse/ARROW-7852 - "pyarrow==0.15.1; python_version < '3.8'", - "pyarrow==0.16.0; python_version == '3.8'", -] +hdfs = ["pyarrow>=0.17.0"] # gssapi should not be included in all_remotes, because it doesn't have wheels # for linux and mac, so it will fail to compile if user doesn't have all the # requirements, including kerberos itself. Once all the wheels are available, diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 76af434d47..d5e5077384 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -1070,7 +1070,7 @@ def write(self, bucket, key, body): self.assertEqual(p.returncode, 0) -@flaky(max_runs=3, min_passes=1) +@flaky(max_runs=5, min_passes=1) class TestReproExternalSSH(SSH, TestReproExternalBase): _dir = None cache_type = "copy" @@ -1095,23 +1095,33 @@ def cmd(self, i, o): def write(self, bucket, key, body): path = posixpath.join(self._dir, key) - ssh = paramiko.SSHClient() - ssh.load_system_host_keys() - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect("127.0.0.1") - - sftp = ssh.open_sftp() + ssh = None + sftp = None try: - sftp.stat(path) - sftp.remove(path) - except OSError: - pass - - stdin, stdout, stderr = ssh.exec_command(f"mkdir -p $(dirname {path})") - self.assertEqual(stdout.channel.recv_exit_status(), 0) - - with sftp.open(path, "w+") as fobj: - fobj.write(body) + ssh = paramiko.SSHClient() + ssh.load_system_host_keys() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect("127.0.0.1") + + sftp = ssh.open_sftp() + try: + sftp.stat(path) + sftp.remove(path) + except OSError: + pass + + stdin, stdout, stderr = ssh.exec_command( + f"mkdir -p $(dirname {path})" + ) + self.assertEqual(stdout.channel.recv_exit_status(), 0) + + with sftp.open(path, "w+") as fobj: + fobj.write(body) + finally: + if sftp: + sftp.close() + if ssh: + ssh.close() class TestReproExternalLOCAL(Local, TestReproExternalBase): diff --git a/tests/unit/remote/test_azure.py b/tests/unit/remote/test_azure.py index fa537fcacb..a623379b7b 100644 --- a/tests/unit/remote/test_azure.py +++ b/tests/unit/remote/test_azure.py @@ -42,5 +42,7 @@ def test_get_file_checksum(tmp_dir): to_info = remote.path_cls(Azure.get_url()) remote.upload(PathInfo("foo"), to_info) assert remote.exists(to_info) - # make sure the checksum is a hex number - int(remote.get_file_checksum(to_info), 16) + checksum = remote.get_file_checksum(to_info) + assert checksum + assert isinstance(checksum, str) + assert checksum.strip("'").strip('"') == checksum