Skip to content

Commit

Permalink
prune more by default
Browse files Browse the repository at this point in the history
  • Loading branch information
msarahan committed May 28, 2019
1 parent efbab2a commit d2d954d
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 43 deletions.
2 changes: 1 addition & 1 deletion conda/core/prefix_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(self, prefix_path, pip_interop_enabled=None):
if pip_interop_enabled is not None
else context.pip_interop_enabled)

@time_recorder
@time_recorder(module_name=__name__)
def load(self):
self.__prefix_records = {}
_conda_meta_dir = join(self.prefix_path, 'conda-meta')
Expand Down
85 changes: 76 additions & 9 deletions conda/core/solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import absolute_import, division, print_function, unicode_literals

from collections import deque
import copy
from genericpath import exists
from logging import DEBUG, getLogger
from os.path import join
Expand Down Expand Up @@ -257,18 +259,14 @@ def solve_final_state(self, update_modifier=NULL, deps_modifier=NULL, prune=NULL

@time_recorder(module_name=__name__)
def _collect_all_metadata(self, ssc):
if ssc.prune: # or update_modifier == UpdateModifier.UPDATE_ALL # pending conda/constructor#138 # NOQA
# Users are struggling with the prune functionality in --update-all, due to
# https://github.com/conda/constructor/issues/138. Until that issue is resolved,
# and for the foreseeable future, it's best to be more conservative with --update-all.

if ssc.specs_from_history_map:
# Start with empty specs map for UPDATE_ALL because we're optimizing the update
# only for specs the user has requested; it's ok to remove dependencies.

# However, because of https://github.com/conda/constructor/issues/138, we need
# to hard-code keeping conda, conda-build, and anaconda, if they're already in
# the environment.
for pkg_name in ('anaconda', 'conda', 'conda-build'):
for pkg_name in ('anaconda', 'conda', 'conda-build', 'python'):
if ssc.prefix_data.get(pkg_name, None):
ssc.specs_from_history_map[pkg_name] = MatchSpec(pkg_name)
else:
Expand All @@ -279,6 +277,12 @@ def _collect_all_metadata(self, ssc):
# add in historically-requested specs
ssc.specs_map.update(ssc.specs_from_history_map)

# add in aggressively updated packages
ssc.specs_map.update(
(prec.name, MatchSpec(prec.name)) for prec in ssc.prefix_data.iter_records()
if MatchSpec(prec.name) in context.aggressive_update_packages
)

prepared_specs = set(concatv(
self.specs_to_remove,
self.specs_to_add,
Expand Down Expand Up @@ -383,6 +387,63 @@ def _add_specs(self, ssc):
# TLDR: when working with MatchSpec objects,
# - to minimize the version change, set MatchSpec(name=name, target=prec.dist_str())
# - to freeze the package, set all the components of MatchSpec individually

# the only things we should consider freezing are things that don't conflict with the new
# specs being added.
explicit_spec_package_pool = {}
if ssc.update_modifier == UpdateModifier.FREEZE_INSTALLED:
specs_seen = set()
specs_pool = deque(copy.copy(self.specs_to_add))
while specs_pool:
spec = specs_pool.popleft()
freezes = explicit_spec_package_pool.get(spec.name, set())
matches = set(ssc.r.find_matches(spec))
explicit_spec_package_pool[spec.name] = freezes | matches
specs_seen.add(spec)
for prec in matches:
for dep in prec.get('depends', []):
dep = MatchSpec(dep)
if dep not in specs_seen and dep not in specs_pool:
specs_pool.append(dep)

def pkg_in_shared_pool(prec, shared_pool, known_bad_specs, known_good_specs):
"""Determine if a package or its recursive deps conflict with the shared pool"""
if prec.name in shared_pool:
if prec not in shared_pool[prec.name]:
return False

specs_pool = deque(MatchSpec(_) for _ in prec.depends)
while specs_pool:
spec = specs_pool.popleft()
matches = ssc.r.find_matches(spec)
# match the immediate dependencies
if spec.name in shared_pool:
overlap = shared_pool[spec.name] & set(matches)
if overlap:
# restrict the space further, potentially
shared_pool[spec.name] = overlap
known_good_specs.add(spec)
else:
known_bad_specs.add(spec)
return False
else:
known_good_specs.add(spec)

# recurse into the next layer of deps
for prec in matches:
for dep in prec.get('depends', []):
dep = MatchSpec(dep)
if not any(dep in col for col in (known_bad_specs, known_good_specs)):
if dep.name in shared_pool:
if (spec.name in shared_pool and not
(shared_pool[spec.name] & set(ssc.r.find_matches(dep)))):
continue
specs_pool.append(dep)
return True

known_bad_specs = set()
known_good_specs = set()

for pkg_name, spec in iteritems(ssc.specs_map):
matches_for_spec = tuple(prec for prec in ssc.solution_precs if spec.match(prec))
if matches_for_spec:
Expand All @@ -398,11 +459,17 @@ def _add_specs(self, ssc):
""") % (pkg_name, spec,
dashlist((text_type(s) for s in matches_for_spec), indent=4)))
target_prec = matches_for_spec[0]

if ssc.update_modifier == UpdateModifier.FREEZE_INSTALLED:
new_spec = MatchSpec(target_prec)
if pkg_name in context.aggressive_update_packages:
ssc.specs_map[pkg_name] = pkg_name
elif pkg_in_shared_pool(target_prec, explicit_spec_package_pool,
known_bad_specs, known_good_specs):
ssc.specs_map[pkg_name] = target_prec.to_match_spec()
elif pkg_name in ssc.specs_from_history_map:
ssc.specs_map[pkg_name] = ssc.specs_from_history_map[pkg_name]
else:
new_spec = MatchSpec(spec, target=target_prec.dist_str())
ssc.specs_map[pkg_name] = new_spec
ssc.specs_map[pkg_name] = MatchSpec(spec, target=target_prec.dist_str())
log.debug("specs_map with targets: %s", ssc.specs_map)

# If we're in UPDATE_ALL mode, we need to drop all the constraints attached to specs,
Expand Down
6 changes: 3 additions & 3 deletions conda/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,11 +505,11 @@ def filter_group(_specs):
reduced = nnew < nold
if reduced:
log.debug('%s: pruned from %d -> %d' % (name, nold, nnew))
if any(ms.optional for ms in _specs):
return reduced
elif nnew == 0:
if nnew == 0:
# Indicates that a conflict was found; we can exit early
return None
elif any(ms.optional for ms in _specs):
return reduced

# Perform the same filtering steps on any dependencies shared across
# *all* packages in the group. Even if just one of the packages does
Expand Down
1 change: 1 addition & 0 deletions tests/core/test_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2310,6 +2310,7 @@ def test_freeze_deps_1():
unlink_order = (
'channel-2::six-1.7.3-py34_0',
'channel-2::python-3.4.5-0',
'channel-2::xz-5.2.3-0',
)
link_order = (
'channel-2::mkl-2017.0.3-0',
Expand Down
6 changes: 3 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ def test_search_3(self):
with make_temp_env() as prefix:
stdout, stderr, _ = run_command(Commands.SEARCH, prefix, "*/linux-64::nose==1.3.7[build=py37_2]", "--info", use_exception_handler=True)
result = stdout.replace("Loading channels: ...working... done", "")
assert "file name : nose-1.3.7-py37_2.tar.bz2" in result
assert "file name : nose-1.3.7-py37_2.conda" in result
assert "name : nose" in result
assert "url : https://repo.anaconda.com/pkgs/main/linux-64/nose-1.3.7-py37_2.tar.bz2" in result
assert "md5 : ff390a1e44d77e54914ca1a2c9e75445" in result
assert "url : https://repo.anaconda.com/pkgs/main/linux-64/nose-1.3.7-py37_2.conda" in result
# assert "md5 : ff390a1e44d77e54914ca1a2c9e75445" in result

@pytest.mark.integration
def test_search_4(self):
Expand Down
48 changes: 21 additions & 27 deletions tests/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,8 @@ def test_tarball_install_and_bad_metadata(self):
flask_fname = flask_data['fn']
tar_old_path = join(PackageCacheData.first_writable().pkgs_dir, flask_fname)

assert isfile(tar_old_path)
# if a .tar.bz2 is already in the file cache, it's fine. Accept it or the .conda file here.
assert isfile(tar_old_path) or isfile(tar_old_path.replace('.conda', '.tar.bz2'))

with pytest.raises(DryRunExit):
run_command(Commands.INSTALL, prefix, tar_old_path, "--dry-run")
Expand Down Expand Up @@ -1036,6 +1037,7 @@ def test_update_with_pinned_packages(self):
# regression test for #6914
with make_temp_env("-c", "https://repo.anaconda.com/pkgs/free", "python=2.7.12") as prefix:
assert package_is_installed(prefix, "readline=6.2")
# removing the history allows python to be updated too
open(join(prefix, 'conda-meta', 'history'), 'w').close()
PrefixData._cache_.clear()
run_command(Commands.UPDATE, prefix, "readline")
Expand Down Expand Up @@ -1122,13 +1124,8 @@ def test_install_prune_flag(self):
assert package_is_installed(prefix, 'python=3')
run_command(Commands.REMOVE, prefix, "flask")
assert not package_is_installed(prefix, 'flask')
assert package_is_installed(prefix, 'itsdangerous')
assert package_is_installed(prefix, 'python=3')

run_command(Commands.INSTALL, prefix, 'pytz', '--prune')

# this should get pruned when flask is removed
assert not package_is_installed(prefix, 'itsdangerous')
assert package_is_installed(prefix, 'pytz')
assert package_is_installed(prefix, 'python=3')

@pytest.mark.skipif(on_win, reason="readline is only a python dependency on unix")
Expand Down Expand Up @@ -1220,27 +1217,19 @@ def test_install_features(self):
assert package_is_installed(prefix, "numpy")
assert package_is_installed(prefix, "nomkl")
assert not package_is_installed(prefix, "mkl")
numpy_prec = PrefixData(prefix).get("numpy")
assert "nomkl" in numpy_prec.build

with make_temp_env("python=2", "numpy=1.13") as prefix:
assert package_is_installed(prefix, "numpy")
assert not package_is_installed(prefix, "nomkl")
assert package_is_installed(prefix, "mkl")
numpy_prec = PrefixData(prefix).get("numpy")
assert "nomkl" not in numpy_prec.build

run_command(Commands.INSTALL, prefix, "nomkl")
run_command(Commands.INSTALL, prefix, "nomkl", no_capture=True)
assert package_is_installed(prefix, "numpy")
assert package_is_installed(prefix, "nomkl")
assert package_is_installed(prefix, "mkl") # it's fine for mkl to still be here I guess
numpy_prec = PrefixData(prefix).get("numpy")
assert "nomkl" in numpy_prec.build

run_command(Commands.INSTALL, prefix, "nomkl", "--prune")
assert not package_is_installed(prefix, "mkl")
assert package_is_installed(prefix, "blas=1.0=openblas")
assert not package_is_installed(prefix, "mkl_fft")
assert not package_is_installed(prefix, "mkl_random")
assert not package_is_installed(prefix, "mkl") # pruned as an indirect dep

def test_clone_offline_simple(self):
with make_temp_env("bzip2") as prefix:
Expand Down Expand Up @@ -1690,9 +1679,11 @@ def test_conda_pip_interop_dependency_satisfied_by_pip(self):
assert not error

output, _, _ = run_command(Commands.INSTALL, prefix, 'flask', '--dry-run', '--json',
use_exception_handler=True)
use_exception_handler=True)
json_obj = json.loads(output)
print(json_obj)
# itsdangerous shouldn't be in this list, because it's already present and satisfied
# by the pip package
assert any(rec["name"] == "flask" for rec in json_obj["actions"]["LINK"])
assert not any(rec["name"] == "itsdangerous" for rec in json_obj["actions"]["LINK"])

Expand Down Expand Up @@ -1848,7 +1839,8 @@ def test_conda_pip_interop_pip_clobbers_conda(self):
assert not stderr
assert "All requested packages already installed." in stdout

stdout, stderr, _ = run_command(Commands.INSTALL, prefix, "six")
stdout, stderr, _ = run_command(Commands.INSTALL, prefix, "six", "--repodata-fn",
"repodata.json", no_capture=True)
assert not stderr
assert package_is_installed(prefix, "six>=1.11")
output, err, _ = run_command(Commands.RUN, prefix, "python", "-m", "pip", "freeze")
Expand Down Expand Up @@ -1982,7 +1974,7 @@ def test_conda_pip_interop_compatible_release_operator(self):
# Regression test for #7776
# important to start the env with six 1.9. That version forces an upgrade later in the test
with make_temp_env("-c", "https://repo.anaconda.com/pkgs/free", "pip=10", "six=1.9", "appdirs",
use_restricted_unicode=on_win) as prefix:
use_restricted_unicode=on_win, no_capture=True) as prefix:
run_command(Commands.CONFIG, prefix, "--set", "pip_interop_enabled", "true")
assert package_is_installed(prefix, "python")
assert package_is_installed(prefix, "six=1.9")
Expand Down Expand Up @@ -2014,14 +2006,14 @@ def test_conda_pip_interop_compatible_release_operator(self):

with pytest.raises(DryRunExit):
run_command(Commands.INSTALL, prefix, "-c", "https://repo.anaconda.com/pkgs/free",
"agate=1.6", "--dry-run")
"agate=1.6", "--dry-run", no_capture=True)

def test_install_freezes_env_by_default(self):
"""We pass --no-update-deps/--freeze-installed by default, effectively. This helps speed things
up by not considering changes to existing stuff unless the solve ends up unsatisfiable."""

# create an initial env
with make_temp_env("python=2", use_restricted_unicode=on_win) as prefix:
with make_temp_env("python=2", use_restricted_unicode=on_win, no_capture=True) as prefix:
assert package_is_installed(prefix, "python=2.7.*")
stdout, stderr, _ = run_command(Commands.LIST, prefix, '--json')
pkgs = json.loads(stdout)
Expand All @@ -2030,20 +2022,22 @@ def test_install_freezes_env_by_default(self):
if entry['name'] in DEFAULT_AGGRESSIVE_UPDATE_PACKAGES:
specs.append(entry['name'])
else:
specs.append(entry['channel'] + '/' + entry['platform'] + '::' +
entry['name'] + '=' + entry['version'] + '=' + entry['build_string'])
# python is the only explicit dep. It's the only thing that enters in here.
if entry['name'] == 'python':
specs.append(entry['channel'] + '/' + entry['platform'] + '::' +
entry['name'] + '=' + entry['version'] + '=' + entry['build_string'])

specs.append('imagesize')
specs = {MatchSpec(s) for s in specs}
import conda.core.solve
r = conda.core.solve.Resolve(get_index())
reduced_index = r.get_reduced_index([MatchSpec('imagesize')])

# now add requests to that env. The call to get_reduced_index should include our exact specs
# now add imagesize to that env. The call to get_reduced_index should include our exact specs
# for the existing env. doing so will greatly reduce the search space for the initial solve
with patch.object(conda.core.solve.Resolve, 'get_reduced_index',
return_value=reduced_index) as mock_method:
run_command(Commands.INSTALL, prefix, "imagesize")
run_command(Commands.INSTALL, prefix, "imagesize", no_capture=True)
# TODO: this should match the specs above. It does, at least as far as I can tell from text
# comparison. Unfortunately, it doesn't evaluate as a match, even though the text all matches.
# I suspect some strange equality of objects issue.
Expand Down

0 comments on commit d2d954d

Please sign in to comment.