Skip to content

Commit

Permalink
add tests for checksum mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
kalefranz committed May 13, 2019
1 parent 73308b6 commit 8690b55
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 86 deletions.
54 changes: 33 additions & 21 deletions conda/core/package_cache_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,13 +471,27 @@ def make_actions_for_record(pref_or_spec):
# (1) already extracted, and
# (2) matches the md5
# If one exists, no actions are needed.
if pref_or_spec.get('md5'):
sha256 = pref_or_spec.get("sha256")
size = pref_or_spec.get("size")
md5 = pref_or_spec.get("md5")

def pcrec_matches(pcrec):
matches = True
if sha256 is not None and pcrec.sha256 is not None:
matches = sha256 == pcrec.sha256
if size is not None and pcrec.size is not None:
matches = size == pcrec.size
if matches and md5 is not None and pcrec is not None:
matches = md5 == pcrec.md5
return matches

if md5:
extracted_pcrec = next((
pcrec for pcrec in concat(PackageCacheData(pkgs_dir).query(pref_or_spec)
for pkgs_dir in context.pkgs_dirs)
if pcrec.is_extracted
), None)
if extracted_pcrec:
if extracted_pcrec and pcrec_matches(extracted_pcrec):
return None, None

# there is no extracted dist that can work, so now we look for tarballs that
Expand All @@ -491,14 +505,16 @@ def make_actions_for_record(pref_or_spec):
) if pcrec.is_fetched),
None
)
if pcrec_from_writable_cache:
if pcrec_from_writable_cache and pcrec_matches(pcrec_from_writable_cache):
# extract in place
extract_axn = ExtractPackageAction(
source_full_path=pcrec_from_writable_cache.package_tarball_full_path,
target_pkgs_dir=dirname(pcrec_from_writable_cache.package_tarball_full_path),
target_extracted_dirname=basename(pcrec_from_writable_cache.extracted_package_dir),
record_or_spec=pcrec_from_writable_cache,
md5sum=pcrec_from_writable_cache.md5,
sha256=pcrec_from_writable_cache.sha256 or sha256,
size=pcrec_from_writable_cache.size or size,
md5=pcrec_from_writable_cache.md5 or md5,
)
return None, extract_axn

Expand All @@ -509,55 +525,51 @@ def make_actions_for_record(pref_or_spec):
), None)

first_writable_cache = PackageCacheData.first_writable()
if pcrec_from_read_only_cache:
if pcrec_from_read_only_cache and pcrec_matches(pcrec_from_read_only_cache):
# we found a tarball, but it's in a read-only package cache
# we need to link the tarball into the first writable package cache,
# and then extract
try:
expected_size_in_bytes = pref_or_spec.size
except AttributeError:
expected_size_in_bytes = None
cache_axn = CacheUrlAction(
url=path_to_url(pcrec_from_read_only_cache.package_tarball_full_path),
target_pkgs_dir=first_writable_cache.pkgs_dir,
target_package_basename=pcrec_from_read_only_cache.fn,
md5sum=pref_or_spec.get('md5'),
sha256sum=pref_or_spec.get('sha256'),
expected_size_in_bytes=expected_size_in_bytes,
sha256=pcrec_from_read_only_cache.get("sha256") or sha256,
size=pcrec_from_read_only_cache.get("size") or size,
md5=pcrec_from_read_only_cache.get("md5") or md5,
)
trgt_extracted_dirname = strip_pkg_extension(pcrec_from_read_only_cache.fn)[0]
extract_axn = ExtractPackageAction(
source_full_path=cache_axn.target_full_path,
target_pkgs_dir=first_writable_cache.pkgs_dir,
target_extracted_dirname=trgt_extracted_dirname,
record_or_spec=pcrec_from_read_only_cache,
md5sum=pcrec_from_read_only_cache.md5,
sha256=pcrec_from_read_only_cache.get("sha256") or sha256,
size=pcrec_from_read_only_cache.get("size") or size,
md5=pcrec_from_read_only_cache.get("md5") or md5,
)
return cache_axn, extract_axn

# if we got here, we couldn't find a matching package in the caches
# we'll have to download one; fetch and extract
url = pref_or_spec.get('url')
assert url
try:
expected_size_in_bytes = pref_or_spec.size
except AttributeError:
expected_size_in_bytes = None

cache_axn = CacheUrlAction(
url=url,
target_pkgs_dir=first_writable_cache.pkgs_dir,
target_package_basename=pref_or_spec.fn,
md5sum=pref_or_spec.get('md5'),
sha256sum=pref_or_spec.get('sha256'),
expected_size_in_bytes=expected_size_in_bytes,
sha256=sha256,
size=size,
md5=md5,
)
extract_axn = ExtractPackageAction(
source_full_path=cache_axn.target_full_path,
target_pkgs_dir=first_writable_cache.pkgs_dir,
target_extracted_dirname=strip_pkg_extension(pref_or_spec.fn)[0],
record_or_spec=pref_or_spec,
md5sum=pref_or_spec.get('md5'),
sha256=sha256,
size=size,
md5=md5,
)
return cache_axn, extract_axn

Expand Down
134 changes: 78 additions & 56 deletions conda/core/path_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
make_menu, mkdir_p, write_as_json_to_file)
from ..gateways.disk.delete import rm_rf
from ..gateways.disk.permissions import make_writable
from ..gateways.disk.read import (compute_md5sum, compute_sha256sum, islink, lexists,
from ..gateways.disk.read import (compute_md5sum, compute_sha256sum, isfile, islink, lexists,
read_index_json)
from ..gateways.disk.update import backoff_rename, touch
from ..history import History
Expand Down Expand Up @@ -1066,13 +1066,13 @@ def target_full_path(self):
class CacheUrlAction(PathAction):

def __init__(self, url, target_pkgs_dir, target_package_basename,
md5sum=None, sha256sum=None, expected_size_in_bytes=None):
sha256=None, size=None, md5=None):
self.url = url
self.target_pkgs_dir = target_pkgs_dir
self.target_package_basename = target_package_basename
self.md5sum = md5sum
self.sha256sum = sha256sum
self.expected_size_in_bytes = expected_size_in_bytes
self.sha256 = sha256
self.size = size
self.md5 = md5
self.hold_path = self.target_full_path + CONDA_TEMP_EXTENSION

def verify(self):
Expand All @@ -1097,57 +1097,74 @@ def execute(self, progress_update_callback=None):
else:
backoff_rename(self.target_full_path, self.hold_path, force=True)

if self.url.startswith('file:/'):
source_path = url_to_path(self.url)
if dirname(source_path) in context.pkgs_dirs:
# if url points to another package cache, link to the writable cache
create_hard_link_or_copy(source_path, self.target_full_path)
source_package_cache = PackageCacheData(dirname(source_path))

# the package is already in a cache, so it came from a remote url somewhere;
# make sure that remote url is the most recent url in the
# writable cache urls.txt
origin_url = source_package_cache._urls_data.get_url(self.target_package_basename)
if origin_url and has_platform(origin_url, context.known_subdirs):
target_package_cache._urls_data.add_url(origin_url)
else:
# so our tarball source isn't a package cache, but that doesn't mean it's not
# in another package cache somewhere
# let's try to find the actual, remote source url by matching md5sums, and then
# record that url as the remote source url in urls.txt
# we do the search part of this operation before the create_link so that we
# don't md5sum-match the file created by 'create_link'
# there is no point in looking for the tarball in the cache that we are writing
# this file into because we have already removed the previous file if there was
# any. This also makes sure that we ignore the md5sum of a possible extracted
# directory that might exist in this cache because we are going to overwrite it
# anyway when we extract the tarball.
source_md5sum = compute_md5sum(source_path)
exclude_caches = self.target_pkgs_dir,
pc_entry = PackageCacheData.tarball_file_in_cache(
source_path, source_md5sum, exclude_caches=exclude_caches
)
source_path = url_to_path(self.url)
if self.url.startswith('file:/') and not isfile(source_path):
self._execute_local(source_path, target_package_cache, progress_update_callback)
else:
self._execute_channel(target_package_cache, progress_update_callback)

if pc_entry:
origin_url = target_package_cache._urls_data.get_url(
pc_entry.extracted_package_dir
)
else:
origin_url = None
def _execute_local(self, source_path, target_package_cache, progress_update_callback=None):
from .package_cache_data import PackageCacheData
if dirname(source_path) in context.pkgs_dirs:
# if url points to another package cache, link to the writable cache
create_hard_link_or_copy(source_path, self.target_full_path)
source_package_cache = PackageCacheData(dirname(source_path))

# the package is already in a cache, so it came from a remote url somewhere;
# make sure that remote url is the most recent url in the
# writable cache urls.txt
origin_url = source_package_cache._urls_data.get_url(self.target_package_basename)
if origin_url and has_platform(origin_url, context.known_subdirs):
target_package_cache._urls_data.add_url(origin_url)
else:
# so our tarball source isn't a package cache, but that doesn't mean it's not
# in another package cache somewhere
# let's try to find the actual, remote source url by matching md5sums, and then
# record that url as the remote source url in urls.txt
# we do the search part of this operation before the create_link so that we
# don't md5sum-match the file created by 'create_link'
# there is no point in looking for the tarball in the cache that we are writing
# this file into because we have already removed the previous file if there was
# any. This also makes sure that we ignore the md5sum of a possible extracted
# directory that might exist in this cache because we are going to overwrite it
# anyway when we extract the tarball.
source_md5sum = compute_md5sum(source_path)
exclude_caches = self.target_pkgs_dir,
pc_entry = PackageCacheData.tarball_file_in_cache(
source_path, source_md5sum, exclude_caches=exclude_caches
)

# copy the tarball to the writable cache
create_link(source_path, self.target_full_path, link_type=LinkType.copy,
force=context.force)
if pc_entry:
origin_url = target_package_cache._urls_data.get_url(
pc_entry.extracted_package_dir
)
else:
origin_url = None

if origin_url and has_platform(origin_url, context.known_subdirs):
target_package_cache._urls_data.add_url(origin_url)
else:
target_package_cache._urls_data.add_url(self.url)
# copy the tarball to the writable cache
create_link(source_path, self.target_full_path, link_type=LinkType.copy,
force=context.force)

else:
download(self.url, self.target_full_path, md5=self.md5sum, sha256=self.sha256sum,
progress_update_callback=progress_update_callback)
target_package_cache._urls_data.add_url(self.url)
if origin_url and has_platform(origin_url, context.known_subdirs):
target_package_cache._urls_data.add_url(origin_url)
else:
target_package_cache._urls_data.add_url(self.url)

def _execute_channel(self, target_package_cache, progress_update_callback=None):
kwargs = {}
if self.size is not None:
kwargs["size"] = self.size
if self.sha256:
kwargs["sha256"] = self.sha256
elif self.md5:
kwargs["md5"] = self.md5
download(
self.url,
self.target_full_path,
progress_update_callback=progress_update_callback,
**kwargs
)
target_package_cache._urls_data.add_url(self.url)

def reverse(self):
if lexists(self.hold_path):
Expand All @@ -1168,13 +1185,15 @@ def __str__(self):
class ExtractPackageAction(PathAction):

def __init__(self, source_full_path, target_pkgs_dir, target_extracted_dirname,
record_or_spec, md5sum):
record_or_spec, sha256, size, md5):
self.source_full_path = source_full_path
self.target_pkgs_dir = target_pkgs_dir
self.target_extracted_dirname = target_extracted_dirname
self.hold_path = self.target_full_path + CONDA_TEMP_EXTENSION
self.record_or_spec = record_or_spec
self.md5sum = md5sum
self.sha256 = sha256
self.size = size
self.md5 = md5

def verify(self):
self._verified = True
Expand All @@ -1198,10 +1217,13 @@ def execute(self, progress_update_callback=None):
assert url
channel = Channel(url) if has_platform(url, context.known_subdirs) else Channel(None)
fn = basename(url)
md5 = self.md5sum or compute_md5sum(self.source_full_path)
sha256 = self.sha256 or compute_sha256sum(self.source_full_path)
size = getsize(self.source_full_path)
if self.size is not None:
assert size == self.size, (size, self.size)
md5 = self.md5 or compute_md5sum(self.source_full_path)
repodata_record = PackageRecord.from_objects(
raw_index_json, url=url, channel=channel, fn=fn, md5=md5, size=size,
raw_index_json, url=url, channel=channel, fn=fn, sha256=sha256, size=size, md5=md5,
)
else:
repodata_record = PackageRecord.from_objects(self.record_or_spec, raw_index_json)
Expand Down
22 changes: 13 additions & 9 deletions conda/gateways/connection/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
from ...base.context import context
from ...common.compat import text_type
from ...common.io import time_recorder
from ...exceptions import (BasicClobberError, CondaDependencyError, CondaHTTPError,
ChecksumMismatchError, maybe_raise)
from ...exceptions import (BasicClobberError, ChecksumMismatchError, CondaDependencyError,
CondaHTTPError, maybe_raise)

log = getLogger(__name__)

Expand Down Expand Up @@ -101,13 +101,17 @@ def download(
log.debug("%s, trying again" % e)
raise

if checksum_builder:
actual_checksum = checksum_builder.hexdigest()
if actual_checksum != checksum:
log.debug("%s sums mismatch for download: %s (%s != %s)", checksum_type, url,
actual_checksum, checksum)
raise ChecksumMismatchError(url, target_full_path, checksum_type, checksum,
actual_checksum)
if md5:
actual_md5 = md5_builder.hexdigest()
if actual_md5 != md5:
log.debug("md5 sums mismatch for download: %s (%s != %s)", url, actual_md5, md5)
raise ChecksumMismatchError(url, target_full_path, "md5", md5, actual_md5)
if sha256:
actual_sha256 = sha256_builder.hexdigest()
if actual_sha256 != sha256:
log.debug("sha256 sums mismatch for download: %s (%s != %s)",
url, actual_sha256, sha256)
raise ChecksumMismatchError(url, target_full_path, "sha256", sha256, actual_sha256)
if size is not None:
actual_size = size_builder
if actual_size != size:
Expand Down
1 change: 1 addition & 0 deletions conda/models/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ class PackageRecord(DictSafeMixin, Entity):

md5 = StringField(default=None, required=False, nullable=True, default_in_dump=False)
url = StringField(default=None, required=False, nullable=True, default_in_dump=False)
sha256 = StringField(default=None, required=False, nullable=True, default_in_dump=False)

@property
def schannel(self):
Expand Down
22 changes: 22 additions & 0 deletions tests/core/test_package_cache_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import pytest

from conda import CondaMultiError
from conda.base.constants import PACKAGE_CACHE_MAGIC_FILE
from conda.base.context import conda_tests_ctxt_mgmt_def_pol
from conda.common.io import env_vars
from conda.core.index import get_index
from conda.core.package_cache_data import PackageCacheData, ProgressiveFetchExtract
from conda.core.path_actions import CacheUrlAction
from conda.exceptions import ChecksumMismatchError
from conda.exports import url_path
from conda.gateways.disk.create import copy
from conda.gateways.disk.permissions import make_read_only
Expand Down Expand Up @@ -217,6 +219,26 @@ def test_conda_pkg_in_pkg_cache_doesnt_overwrite_tar_bz2():
assert repodata_record["fn"] == zlib_tar_bz2_fn


def test_bad_sha256_enforcement():
with make_temp_package_cache() as pkgs_dir:
zlib_conda_prec_bad = PackageRecord.from_objects(zlib_conda_prec, sha256="0" * 10)
assert zlib_conda_prec_bad.sha256 == "0" * 10
pfe = ProgressiveFetchExtract((zlib_conda_prec_bad,))
pfe.prepare()
assert len(pfe.cache_actions) == 1
assert len(pfe.extract_actions) == 1
cache_action = pfe.cache_actions[0]
extact_action = pfe.extract_actions[0]
assert basename(cache_action.target_full_path) == zlib_conda_fn
assert cache_action.target_full_path == extact_action.source_full_path
assert basename(extact_action.target_full_path) == zlib_base_fn
with pytest.raises(CondaMultiError) as exc:
pfe.execute()
assert len(exc.value.errors) == 1
assert isinstance(exc.value.errors[0], ChecksumMismatchError)
assert "expected sha256: 0000000000" in repr(exc.value.errors[0])


def test_tar_bz2_in_cache_not_extracted():
"""
Test that if a .tar.bz2 exists in the package cache (not extracted), and the complementary
Expand Down

0 comments on commit 8690b55

Please sign in to comment.