Skip to content

Commit

Permalink
add safe extract for archivers (pantsbuild#5248)
Browse files Browse the repository at this point in the history
### Problem
When Archivers (TGZ, ZIP etc) used to extract an archive, if there are multiple versions of Pants running on the same machine, Pants might try to override the destination when the destination is being used, as seenion pantsbuild#4763 .  This causes occasional issues that are hard to reproduce, especially for subsystems download runtime supports. 

### Solution
Add a safe mode for Archive, which extract archived content into a temporary directory then use atomic rename to move the temporary directory to the final destination.  If final destination exists before the rename, the errors will be ignored.
  • Loading branch information
UnrememberMe authored and Stu Hood committed Jan 2, 2018
1 parent 258eca6 commit 0aa45f8
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def unpack_package(self, supportdir, version, filename):
supportdir=supportdir, version=version, name=filename)
logger.debug('Tarball for %s(%s): %s', supportdir, version, tarball_filepath)
work_dir = os.path.dirname(tarball_filepath)
TGZ.extract(tarball_filepath, work_dir)
TGZ.extract(tarball_filepath, work_dir, concurrency_safe=True)
return work_dir

@memoized_method
Expand Down
47 changes: 29 additions & 18 deletions src/python/pants/fs/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from collections import OrderedDict
from zipfile import ZIP_DEFLATED

from pants.util.contextutil import open_tar, open_zip
from pants.util.dirutil import safe_walk
from pants.util.contextutil import open_tar, open_zip, temporary_dir
from pants.util.dirutil import safe_concurrent_rename, safe_walk
from pants.util.meta import AbstractClass
from pants.util.strutil import ensure_text

Expand All @@ -22,8 +22,30 @@
class Archiver(AbstractClass):

@classmethod
def extract(cls, path, outdir):
"""Extracts an archive's contents to the specified outdir."""
def extract(cls, path, outdir, filter_func=None, concurrency_safe=False):
"""Extracts an archive's contents to the specified outdir with an optional filter.
:API: public
:param string path: path to the zipfile to extract from
:param string outdir: directory to extract files into
:param function filter_func: optional filter with the filename as the parameter. Returns True
if the file should be extracted. Note that filter_func is ignored for non-zip archives.
:param bool concurrency_safe: True to use concurrency safe method. Concurrency safe extraction
will be performed on a temporary directory and the extacted directory will then be renamed
atomically to the outdir. As a side effect, concurrency safe extraction will not allow
overlay of extracted contents onto an existing outdir.
"""
if concurrency_safe:
with temporary_dir() as temp_dir:
cls._extract(path, temp_dir)
safe_concurrent_rename(temp_dir, outdir)
else:
# Leave the existing default behavior unchanged and allows overlay of contents.
cls._extract(path, outdir, filter_func=filter_func)

@classmethod
def _extract(cls, path, outdir):
raise NotImplementedError()

@abstractmethod
Expand All @@ -41,10 +63,7 @@ class TarArchiver(Archiver):
"""

@classmethod
def extract(cls, path, outdir):
"""
:API: public
"""
def _extract(cls, path, outdir, **kwargs):
with open_tar(path, errorlevel=1) as tar:
tar.extractall(outdir)

Expand Down Expand Up @@ -75,16 +94,8 @@ class ZipArchiver(Archiver):
"""

@classmethod
def extract(cls, path, outdir, filter_func=None):
"""Extract from a zip file, with an optional filter
:API: public
:param string path: path to the zipfile to extract from
:param string outdir: directory to extract files into
:param function filter_func: optional filter with the filename as the parameter. Returns True if
the file should be extracted.
"""
def _extract(cls, path, outdir, filter_func=None, **kwargs):
"""Extract from a zip file, with an optional filter."""
with open_zip(path) as archive_file:
for name in archive_file.namelist():
# While we're at it, we also perform this safety test.
Expand Down
5 changes: 3 additions & 2 deletions tests/python/pants_test/fs/test_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _listtree(self, root, empty_dirs):
return listing

def round_trip(self, archiver, expected_ext, empty_dirs):
def test_round_trip(prefix=None):
def test_round_trip(prefix=None, concurrency_safe=False):
with temporary_dir() as fromdir:
safe_mkdir(os.path.join(fromdir, 'a/b/c'))
touch(os.path.join(fromdir, 'a/b/d/e.txt'))
Expand All @@ -40,7 +40,7 @@ def test_round_trip(prefix=None):
archive, expected_ext))

with temporary_dir() as todir:
archiver.extract(archive, todir)
archiver.extract(archive, todir, concurrency_safe=concurrency_safe)
fromlisting = self._listtree(fromdir, empty_dirs)
if prefix:
fromlisting = set(os.path.join(prefix, x) for x in fromlisting)
Expand All @@ -50,6 +50,7 @@ def test_round_trip(prefix=None):

test_round_trip()
test_round_trip(prefix='jake')
test_round_trip(concurrency_safe=True)

def test_tar(self):
self.round_trip(archiver('tar'), expected_ext='tar', empty_dirs=True)
Expand Down

0 comments on commit 0aa45f8

Please sign in to comment.