Skip to content

Commit

Permalink
test: fix hdfs/azurite/oss tests (iterative#3864)
Browse files Browse the repository at this point in the history
* travis: fix typo

* tests: azure: fix expected etag format

* test: ssh: increase the number of tries

* travis: don't install docker

* dvc: update pyarrow

* tests: ssh: don't leak connections

* remote: don't fail when cache/remote doesn't have prefix subdir
  • Loading branch information
efiop authored May 23, 2020
1 parent 5ab948f commit b7ad2dc
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 33 deletions.
2 changes: 2 additions & 0 deletions dvc/remote/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ def list_cache_paths(self, prefix=None, progress_callback=None):
dirs = deque([root])

with self.hdfs(self.path_info) as hdfs:
if prefix and not hdfs.exists(root):
return
while dirs:
try:
entries = hdfs.ls(dirs.pop(), detail=True)
Expand Down
2 changes: 2 additions & 0 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def list_cache_paths(self, prefix=None, progress_callback=None):
assert self.path_info is not None
if prefix:
path_info = self.path_info / prefix[:2]
if not self.exists(path_info):
return
else:
path_info = self.path_info
if progress_callback:
Expand Down
2 changes: 2 additions & 0 deletions dvc/remote/ssh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ def list_cache_paths(self, prefix=None, progress_callback=None):
else:
root = self.path_info.path
with self.ssh(self.path_info) as ssh:
if prefix and not ssh.exists(root):
return
# If we simply return an iterator then with above closes instantly
if progress_callback:
for path in ssh.walk_files(root):
Expand Down
2 changes: 1 addition & 1 deletion scripts/ci/before_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fi

echo >env.sh

if [[ "$TRAVIS_BUILD_STAGE_NAME" == "Test" ]]; then
if [[ "$TRAVIS_BUILD_STAGE_NAME" == "test" ]]; then
if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
# NOTE: ssh keys for ssh test to be able to ssh to the localhost
ssh-keygen -t rsa -N "" -f mykey
Expand Down
7 changes: 0 additions & 7 deletions scripts/ci/install_azurite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,6 @@
set -e
set -x

# install docker
export DEBIAN_FRONTEND=noninteractive
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install -y docker-ce

# run azurite
sudo docker run -d --restart always -e executable=blob -p 10000:10000 \
--tmpfs /opt/azurite/folder \
Expand Down
7 changes: 1 addition & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,7 @@ def run(self):
azure = ["azure-storage-blob==2.1.0"]
oss = ["oss2==2.6.1"]
ssh = ["paramiko>=2.5.0"]
hdfs = [
# pyarrow-0.16.0 import fails on 3.7 (works on 3.6 though)
# due to: https://issues.apache.org/jira/browse/ARROW-7852
"pyarrow==0.15.1; python_version < '3.8'",
"pyarrow==0.16.0; python_version == '3.8'",
]
hdfs = ["pyarrow>=0.17.0"]
# gssapi should not be included in all_remotes, because it doesn't have wheels
# for linux and mac, so it will fail to compile if user doesn't have all the
# requirements, including kerberos itself. Once all the wheels are available,
Expand Down
44 changes: 27 additions & 17 deletions tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ def write(self, bucket, key, body):
self.assertEqual(p.returncode, 0)


@flaky(max_runs=3, min_passes=1)
@flaky(max_runs=5, min_passes=1)
class TestReproExternalSSH(SSH, TestReproExternalBase):
_dir = None
cache_type = "copy"
Expand All @@ -1095,23 +1095,33 @@ def cmd(self, i, o):
def write(self, bucket, key, body):
path = posixpath.join(self._dir, key)

ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect("127.0.0.1")

sftp = ssh.open_sftp()
ssh = None
sftp = None
try:
sftp.stat(path)
sftp.remove(path)
except OSError:
pass

stdin, stdout, stderr = ssh.exec_command(f"mkdir -p $(dirname {path})")
self.assertEqual(stdout.channel.recv_exit_status(), 0)

with sftp.open(path, "w+") as fobj:
fobj.write(body)
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect("127.0.0.1")

sftp = ssh.open_sftp()
try:
sftp.stat(path)
sftp.remove(path)
except OSError:
pass

stdin, stdout, stderr = ssh.exec_command(
f"mkdir -p $(dirname {path})"
)
self.assertEqual(stdout.channel.recv_exit_status(), 0)

with sftp.open(path, "w+") as fobj:
fobj.write(body)
finally:
if sftp:
sftp.close()
if ssh:
ssh.close()


class TestReproExternalLOCAL(Local, TestReproExternalBase):
Expand Down
6 changes: 4 additions & 2 deletions tests/unit/remote/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,7 @@ def test_get_file_checksum(tmp_dir):
to_info = remote.path_cls(Azure.get_url())
remote.upload(PathInfo("foo"), to_info)
assert remote.exists(to_info)
# make sure the checksum is a hex number
int(remote.get_file_checksum(to_info), 16)
checksum = remote.get_file_checksum(to_info)
assert checksum
assert isinstance(checksum, str)
assert checksum.strip("'").strip('"') == checksum

0 comments on commit b7ad2dc

Please sign in to comment.