Skip to content

Commit

Permalink
Breadth first search for conflicts (conda#9006)
Browse files Browse the repository at this point in the history
Breadth first search for conflicts
  • Loading branch information
msarahan authored Aug 1, 2019
2 parents 83b0f41 + 7df669d commit 5c66306
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 97 deletions.
78 changes: 26 additions & 52 deletions conda/models/prefix_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,58 +409,32 @@ def __init__(self, records, specs=()):
consolidated_graph[node.name] = cg
self.graph_by_name = consolidated_graph

def depth_first_search_by_name(self, root_spec, spec_name, allowed_specs):
"""Return paths from root_spec to spec_name"""
if root_spec.name == spec_name:
return [[root_spec]]
visited = set()

def build_dependency_chain(node, spc, chains=None):
visited.add(node)
if not chains:
chains = [[]]
chain = chains[-1]
if node == spc:
chain.append(spc)
return [chain]
else:
chain.append(node)
children = sorted(self.graph_by_name.get(node, set()),
key=lambda x: list(self.graph_by_name.keys()).index(x))
for child in children:
if child not in visited:
new_chain = [[c for c in chain]]
chains.extend(build_dependency_chain(child, spc, new_chain))
return chains

chains = build_dependency_chain(root_spec.name, spec_name)

final_chains = []
for chain in sorted(chains, key=len):
if chain[0] == root_spec.name and chain[-1] == spec_name:
# remap to matchspecs
# specs_by_name has two keys: parent, then name of spec
matchspecs_for_chain = [[]]
for idx, name in enumerate(chain[1:]):
matchspecs_to_merge = []
matchspecs = self.specs_by_name[chain[idx]][name]
for ms in matchspecs:
if any(ms.match(rec) for rec in allowed_specs.get(ms.name, [])):
matchspecs_to_merge.append(ms)
try:
merged = MatchSpec.merge(matchspecs_to_merge)
if merged:
for ms_chain in matchspecs_for_chain:
ms_chain.append(merged[0])
except ValueError:
matchspecs_for_chain = [_[:] for _ in
matchspecs_for_chain * len(matchspecs_to_merge)]
for idx, ms in enumerate(matchspecs_to_merge):
matchspecs_for_chain[idx].append(ms)
for ms_chain in matchspecs_for_chain:
final_chains.append(tuple([root_spec] + ms_chain))
break
return set(final_chains)
def breadth_first_search_by_name(self, root_spec, target_spec):
"""Return shorted path from root_spec to spec_name"""
queue = []
queue.append([root_spec])
visited = []
while queue:
path = queue.pop(0)
node = path[-1]
if node in visited:
continue
visited.append(node)
if node == target_spec:
return path
children = []
specs = self.specs_by_name.get(node.name)
if specs is None:
continue
for _, deps in specs.items():
children.extend(list(deps))
for adj in children:
if adj.name == target_spec.name and adj.version != target_spec.version:
pass
else:
new_path = list(path)
new_path.append(adj)
queue.append(new_path)


# if __name__ == "__main__":
Expand Down
6 changes: 6 additions & 0 deletions conda/models/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,12 @@ def to_match_spec(self):
build=self.build,
)

def to_simple_match_spec(self):
return MatchSpec(
name=self.name,
version=self.version,
)

@property
def namekey(self):
return "global:" + self.name
Expand Down
132 changes: 97 additions & 35 deletions conda/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ._vendor.auxlib.collection import frozendict
from ._vendor.auxlib.decorators import memoize, memoizemethod
from ._vendor.toolz import concat, groupby
from ._vendor.tqdm import tqdm
from .base.constants import ChannelPriority, MAX_CHANNEL_PRIORITY, SatSolverChoice
from .base.context import context
from .common.compat import iteritems, iterkeys, itervalues, odict, on_win, text_type
Expand All @@ -22,7 +23,6 @@
from .models.channel import Channel, MultiChannel
from .models.enums import NoarchType, PackageType
from .models.match_spec import MatchSpec
from .models.prefix_graph import GeneralGraph
from .models.records import PackageRecord
from .models.version import VersionOrder

Expand Down Expand Up @@ -287,7 +287,6 @@ def _classify_bad_deps(self, bad_deps, specs_to_add, history_specs, strict_chann
'cuda': set(), }
specs_to_add = set(MatchSpec(_) for _ in specs_to_add or [])
history_specs = set(MatchSpec(_) for _ in history_specs or [])

for chain in bad_deps:
# sometimes chains come in as strings
if chain[-1].name == 'python' and len(chain) > 1 and \
Expand Down Expand Up @@ -315,7 +314,7 @@ def _classify_bad_deps(self, bad_deps, specs_to_add, history_specs, strict_chann
if not match:
classes['direct'].add((tuple(chain), str(MatchSpec(chain[0], target=None))))
else:
if len(chain) > 1 or not any(len(c) > 1 and c[0] == chain[0] for c in bad_deps):
if len(chain) > 1 or any(len(c) >= 1 and c[0] == chain[0] for c in bad_deps):
classes['direct'].add((tuple(chain),
str(MatchSpec(chain[0], target=None))))

Expand All @@ -333,10 +332,57 @@ def find_matches_with_strict(self, ms, strict_channel_priority):
return tuple(f for f in matches if f.channel.name == sole_source_channel_name)

def find_conflicts(self, specs, specs_to_add=None, history_specs=None):
if not context.json:
print("\nFound conflicts! Looking for incompatible packages.\n"
"This can take several minutes. Press CTRL-C to abort.")
bad_deps = self.build_conflict_map(specs, specs_to_add, history_specs)
strict_channel_priority = context.channel_priority == ChannelPriority.STRICT
raise UnsatisfiableError(bad_deps, strict=strict_channel_priority)

def group_and_merge_specs(self, bad_deps_for_spec):
bad_deps = []
bd = groupby(lambda x: x[-1].name and len(x), bad_deps_for_spec)
for _, group in bd.items():
if len(group) > 1:
try:
last_merged_spec = MatchSpec.union(ch[-1] for ch in group)[0]
bad_dep = group[0][0:-1]
bad_dep.append(last_merged_spec)
bad_deps.append(bad_dep)
except ValueError:
bad_deps.extend(group)
else:
bad_deps.extend(group)
return bad_deps

def breadth_first_search_by_spec(self, root_spec, target_spec, allowed_specs):
"""Return shorted path from root_spec to spec_name"""
queue = []
queue.append([root_spec])
visited = []
while queue:
path = queue.pop(0)
node = path[-1]
if node in visited:
continue
visited.append(node)
if node == target_spec:
return path
children = []
specs = [_.depends for _ in allowed_specs.get(node.name)] \
if node.name in allowed_specs.keys() else None
if specs is None:
continue
for deps in specs:
children.extend([MatchSpec(d) for d in deps])
for adj in children:
if adj.name == target_spec.name and adj.version != target_spec.version:
pass
else:
new_path = list(path)
new_path.append(adj)
queue.append(new_path)

def build_conflict_map(self, specs, specs_to_add=None, history_specs=None):
"""Perform a deeper analysis on conflicting specifications, by attempting
to find the common dependencies that might be the cause of conflicts.
Expand Down Expand Up @@ -384,38 +430,53 @@ def build_conflict_map(self, specs, specs_to_add=None, history_specs=None):
bad_deps = []
dep_collections = tuple(set(sdep.keys()) for sdep in sdeps.values())
deps = set.union(*dep_collections) if dep_collections else []
# for each possible package being considered, look at how pools interact
for dep in deps:
sdeps_with_dep = {}
for k, v in sdeps.items():
if dep in v:
sdeps_with_dep[k] = v
if len(sdeps_with_dep) <= 1:
continue
# if all of the pools overlap, we're good. Next dep.
if bool(set.intersection(*[v[dep] for v in sdeps_with_dep.values()])):
continue
# start out filtering nothing. invalid_chains will tweak this dict to filter more
# as it goes
records = set.union(*tuple(rec for records in sdeps_with_dep.values()
for rec in records.values()))
# determine the invalid chains for each specific spec. Each of these chains
# should start with `spec` and end with the first encountered conflict. A
# conflict is something that is either not available at all, or is present in
# more than one pool, but those pools do not all overlap.

records_for_graph = groupby(lambda r: r.name,
(r for r in records if isinstance(r, PackageRecord)))
# records_per_name is a completely arbitrary number here. It is meant to gather more
# than just one record, to explore the space of dependencies a bit. Doing all of them
# can be an enormous problem, though. This is hopefully a good compromise.
records_per_name = 7
g = GeneralGraph([_ for v in records_for_graph.values() for _ in v[:records_per_name]])
spec_order = sorted(sdeps_with_dep.keys(),
key=lambda x: list(g.graph_by_name.keys()).index(x.name))
for spec in spec_order:
# the DFS approach works well when things are actually in the graph
bad_deps.extend(g.depth_first_search_by_name(spec, dep, sdeps[spec]))

with tqdm(total=len(deps), desc="Finding conflicts",
leave=False, disable=context.json) as t:
for dep in deps:
t.set_description("Examining {}".format(dep))
t.update()
sdeps_with_dep = {}
for k, v in sdeps.items():
if dep in v:
sdeps_with_dep[k] = v
if len(sdeps_with_dep) <= 1:
continue
# if all of the pools overlap, we're good. Next dep.
if bool(set.intersection(*[v[dep] for v in sdeps_with_dep.values()])):
continue
spec_order = sdeps_with_dep.keys()
for spec in tqdm(spec_order, desc="Comparing specs that have this dependency",
leave=False, disable=context.json):
allowed_specs = sdeps[spec]
dep_vers = []
for key, val in allowed_specs.items():
if key != [_.name for _ in spec_order]:
dep_vers.extend([v.depends for v in val])
dep_ms = {MatchSpec(p) for pkgs in dep_vers for p in pkgs if dep in p}
dep_ms.update(msspec for msspec in sdeps.keys() if msspec.name == dep)
bad_deps_for_spec = []
# # sort specs from least specific to most specific. Only continue
# # to examine a dep if a conflict hasn't been found for its name
# dep_ms = sorted(list(dep_ms), key=lambda x: (
# exactness_and_number_of_deps(self, x), x.dist_str()))
# conflicts_found = set()
with tqdm(total=len(dep_ms), desc="Finding conflict paths",
leave=False, disable=context.json) as t2:
for conflicting_spec in dep_ms:
t2.set_description("Finding shortest conflict path for {}"
.format(conflicting_spec))
t2.update()
if conflicting_spec.name == spec.name:
chain = [conflicting_spec] if \
conflicting_spec.version == spec.version else None
else:
chain = self.breadth_first_search_by_spec(
spec, conflicting_spec, allowed_specs)
if chain:
bad_deps_for_spec.append(chain)
if bad_deps_for_spec:
bad_deps.extend(self.group_and_merge_specs(bad_deps_for_spec))

if not bad_deps:
# no conflicting nor missing packages found, return the bad specs
Expand All @@ -427,6 +488,7 @@ def build_conflict_map(self, specs, specs_to_add=None, history_specs=None):
deps = groupby(lambda x: x.name, deps)

bad_deps.extend([[spec, MatchSpec.union(_)[0]] for _ in deps.values()])

bad_deps = self._classify_bad_deps(bad_deps, specs_to_add, history_specs,
strict_channel_priority)
return bad_deps
Expand Down
50 changes: 49 additions & 1 deletion tests/models/test_prefix_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from conda.exceptions import CyclicalDependencyError
from conda.models.match_spec import MatchSpec
import conda.models.prefix_graph
from conda.models.prefix_graph import PrefixGraph
from conda.models.prefix_graph import PrefixGraph, GeneralGraph
from conda.models.records import PackageRecord
import pytest
from tests.core.test_solve import get_solver_4, get_solver_5

Expand Down Expand Up @@ -920,3 +921,50 @@ def test_deep_cyclical_dependency():
'sqlite',
)
assert nodes == order


def test_general_graph_bfs_simple():
a = PackageRecord(name="a", version="1", build="0", build_number=0, depends=["b", "c", "d"])
b = PackageRecord(name="b", version="1", build="0", build_number=0, depends=["e"])
c = PackageRecord(name="c", version="1", build="0", build_number=0)
d = PackageRecord(name="d", version="1", build="0", build_number=0, depends=["f", "g"])
e = PackageRecord(name="e", version="1", build="0", build_number=0)
f = PackageRecord(name="f", version="1", build="0", build_number=0)
g = PackageRecord(name="g", version="1", build="0", build_number=0)
records = [a, b, c, d, e, f, g]
graph = GeneralGraph(records)

a_to_c = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("c"))
assert a_to_c == [MatchSpec("a"), MatchSpec("c")]

a_to_f = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("f"))
assert a_to_f == [MatchSpec("a"), MatchSpec("d"), MatchSpec("f")]

a_to_a = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("a"))
assert a_to_a == [MatchSpec("a")]

a_to_not_exist = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("z"))
assert a_to_not_exist is None

backwards = graph.breadth_first_search_by_name(MatchSpec("d"), MatchSpec("a"))
assert backwards is None


def test_general_graph_bfs_version():
a = PackageRecord(name="a", version="1", build="0", build_number=0, depends=["b", "c", "d"])
b = PackageRecord(name="b", version="1", build="0", build_number=0, depends=["e"])
c = PackageRecord(name="c", version="1", build="0", build_number=0, depends=["g=1"])
d = PackageRecord(name="d", version="1", build="0", build_number=0, depends=["f", "g=2"])
e = PackageRecord(name="e", version="1", build="0", build_number=0)
f = PackageRecord(name="f", version="1", build="0", build_number=0)
g1 = PackageRecord(name="g", version="1", build="0", build_number=0)
g2 = PackageRecord(name="g", version="2", build="0", build_number=0)
records = [a, b, c, d, e, f, g1, g2]
graph = GeneralGraph(records)

a_to_g1 = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("g=1"))
assert a_to_g1 == [MatchSpec("a"), MatchSpec("c"), MatchSpec("g=1")]

a_to_g2 = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("g=2"))
assert a_to_g2 == [MatchSpec("a"), MatchSpec("d"), MatchSpec("g=2")]

Loading

0 comments on commit 5c66306

Please sign in to comment.