Skip to content

Commit

Permalink
hdfs: implement makedirs separately (iterative#4128)
Browse files Browse the repository at this point in the history
Clouds that have a non-noop `makedirs` should implement it in their own
method. Hdfs somehow slipped under the radar and had `mkdir` used
explicitly in `copy` and `upload` methods.
  • Loading branch information
efiop authored Jun 29, 2020
1 parent 83a2afc commit 7b97d72
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions dvc/remote/hdfs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import io
import logging
import os
import posixpath
import re
import subprocess
from collections import deque
Expand Down Expand Up @@ -112,10 +111,13 @@ def remove(self, path_info):
with self.hdfs(path_info) as hdfs:
hdfs.rm(path_info.path)

def makedirs(self, path_info):
with self.hdfs(path_info) as hdfs:
# NOTE: hdfs.mkdir creates parents by default
hdfs.mkdir(path_info.path)

def copy(self, from_info, to_info, **_kwargs):
dname = posixpath.dirname(to_info.path)
with self.hdfs(to_info) as hdfs:
hdfs.mkdir(dname)
# NOTE: this is how `hadoop fs -cp` works too: it copies through
# your local machine.
with hdfs.open(from_info.path, "rb") as from_fobj:
Expand Down Expand Up @@ -169,7 +171,6 @@ def get_file_hash(self, path_info):

def _upload(self, from_file, to_info, **_kwargs):
with self.hdfs(to_info) as hdfs:
hdfs.mkdir(posixpath.dirname(to_info.path))
tmp_file = tmp_fname(to_info.path)
with open(from_file, "rb") as fobj:
hdfs.upload(tmp_file, fobj)
Expand Down

0 comments on commit 7b97d72

Please sign in to comment.