Skip to content

Commit

Permalink
Make open_zip print realpath when raising BadZipfile. (pantsbuild#4186)
Browse files Browse the repository at this point in the history
open_zip is used to unzip jars, but is usually called
on files under .pants.d, which increasingly are symlinks.

In the motivating example, a jar was corrupted and could
not be opened by detect_duplicates task. The Exception was:

```
Exception message: Bad Zipfile
/buildroot/.pants.d/<snip>/syms/org/some/corrupted.jar: \
    File is not a zip file
```

The corrupted file is really at the realpath, and
neither cleaning nor deleting the symlink will help.
The updated output hopefully makes that more clear.

This commit also adds a None check for the passed file_path.
It turns out that zipfile does not filter falsey values but instead
bubbles up an AttributeError that is tough to traceback:
```
        # Determine file size
>       fpin.seek(0, 2)
E       AttributeError: 'NoneType' object has no attribute 'seek'
```
a custom subclass of BadZipFile to return in that instance.

None of the behavior here is new, this commit just provides improved feedback.
Passing None to zipfile.Zipfile has always halted execution, as did non-zip files.
  • Loading branch information
mateor authored Jan 26, 2017
1 parent d95447f commit 35ec8d7
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
25 changes: 20 additions & 5 deletions src/python/pants/util/contextutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
from pants.util.tarutil import TarFile


class InvalidZipPath(ValueError):
"""Indicates a bad zip file path."""


@contextmanager
def environment_as(**kwargs):
"""Update the environment to the supplied values, for example:
Expand Down Expand Up @@ -165,16 +169,27 @@ def pushd(directory):

@contextmanager
def open_zip(path_or_file, *args, **kwargs):
"""
A with-context for zip files. Passes through positional and kwargs to zipfile.ZipFile.
"""A with-context for zip files.
:API: public
Passes through *args and **kwargs to zipfile.ZipFile.
:API: public
:param path_or_file: Full path to zip file.
:param args: Any extra args accepted by `zipfile.ZipFile`.
:param kwargs: Any extra keyword args accepted by `zipfile.ZipFile`.
:raises: `InvalidZipPath` if path_or_file is invalid.
:raises: `zipfile.BadZipfile` if zipfile.ZipFile cannot open a zip at path_or_file.
:returns: `class 'contextlib.GeneratorContextManager`.
"""
if not path_or_file:
raise InvalidZipPath('Invalid zip location: {}'.format(path_or_file))
allowZip64 = kwargs.pop('allowZip64', True)
try:
allowZip64 = kwargs.pop('allowZip64', True)
zf = zipfile.ZipFile(path_or_file, *args, allowZip64=allowZip64, **kwargs)
except zipfile.BadZipfile as bze:
raise zipfile.BadZipfile("Bad Zipfile {0}: {1}".format(path_or_file, bze))
# Use the realpath in order to follow symlinks back to the problem source file.
raise zipfile.BadZipfile("Bad Zipfile {0}: {1}".format(os.path.realpath(path_or_file), bze))
try:
yield zf
finally:
Expand Down
19 changes: 18 additions & 1 deletion tests/python/pants_test/util/test_contextutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
import subprocess
import sys
import unittest
import zipfile

import mock

from pants.util.contextutil import (HardSystemExit, Timer, environment_as, exception_logging,
from pants.util.contextutil import (HardSystemExit, InvalidZipPath, Timer, environment_as, exception_logging,
hard_exit_handler, maybe_profiled, open_zip, pushd, stdio_as,
temporary_dir, temporary_file)

Expand Down Expand Up @@ -160,6 +161,22 @@ def test_open_zipFalse(self):
with open_zip(os.path.join(tempdir, 'test'), 'w', allowZip64=False) as zf:
self.assertFalse(zf._allowZip64)

def test_open_zip_raises_exception_on_falsey_paths(self):
falsey = (None, '', False)
for invalid in falsey:
with self.assertRaises(InvalidZipPath):
open_zip(invalid).gen.next()

def test_open_zip_returns_realpath_on_badzipfile(self):
# In case of file corruption, deleting a Pants-constructed symlink would not resolve the error.
with temporary_file() as not_zip:
with temporary_dir() as tempdir:
file_symlink = os.path.join(tempdir, 'foo')
os.symlink(not_zip.name, file_symlink)
self.assertEquals(os.path.realpath(file_symlink), os.path.realpath(not_zip.name))
with self.assertRaisesRegexp(zipfile.BadZipfile, r'{}'.format(not_zip.name)):
open_zip(file_symlink).gen.next()

def test_stdio_as(self):
old_stdout, old_stderr, old_stdin = sys.stdout, sys.stderr, sys.stdin

Expand Down

0 comments on commit 35ec8d7

Please sign in to comment.