Breadth first search for conflicts (conda#9006)

Breadth first search for conflicts
makbigc · Aug 1, 2019 · 5c66306 · 5c66306
2 parents 83b0f41 + 7df669d
commit 5c66306
Show file tree

Hide file tree

Showing 5 changed files with 233 additions and 97 deletions.
diff --git a/conda/models/prefix_graph.py b/conda/models/prefix_graph.py
@@ -409,58 +409,32 @@ def __init__(self, records, specs=()):
             consolidated_graph[node.name] = cg
         self.graph_by_name = consolidated_graph
 
-    def depth_first_search_by_name(self, root_spec, spec_name, allowed_specs):
-        """Return paths from root_spec to spec_name"""
-        if root_spec.name == spec_name:
-            return [[root_spec]]
-        visited = set()
-
-        def build_dependency_chain(node, spc, chains=None):
-            visited.add(node)
-            if not chains:
-                chains = [[]]
-            chain = chains[-1]
-            if node == spc:
-                chain.append(spc)
-                return [chain]
-            else:
-                chain.append(node)
-                children = sorted(self.graph_by_name.get(node, set()),
-                                  key=lambda x: list(self.graph_by_name.keys()).index(x))
-                for child in children:
-                    if child not in visited:
-                        new_chain = [[c for c in chain]]
-                        chains.extend(build_dependency_chain(child, spc, new_chain))
-            return chains
-
-        chains = build_dependency_chain(root_spec.name, spec_name)
-
-        final_chains = []
-        for chain in sorted(chains, key=len):
-            if chain[0] == root_spec.name and chain[-1] == spec_name:
-                # remap to matchspecs
-                #   specs_by_name has two keys: parent, then name of spec
-                matchspecs_for_chain = [[]]
-                for idx, name in enumerate(chain[1:]):
-                    matchspecs_to_merge = []
-                    matchspecs = self.specs_by_name[chain[idx]][name]
-                    for ms in matchspecs:
-                        if any(ms.match(rec) for rec in allowed_specs.get(ms.name, [])):
-                            matchspecs_to_merge.append(ms)
-                    try:
-                        merged = MatchSpec.merge(matchspecs_to_merge)
-                        if merged:
-                            for ms_chain in matchspecs_for_chain:
-                                ms_chain.append(merged[0])
-                    except ValueError:
-                        matchspecs_for_chain = [_[:] for _ in
-                                                matchspecs_for_chain * len(matchspecs_to_merge)]
-                        for idx, ms in enumerate(matchspecs_to_merge):
-                            matchspecs_for_chain[idx].append(ms)
-                for ms_chain in matchspecs_for_chain:
-                    final_chains.append(tuple([root_spec] + ms_chain))
-                break
-        return set(final_chains)
+    def breadth_first_search_by_name(self, root_spec, target_spec):
+        """Return shorted path from root_spec to spec_name"""
+        queue = []
+        queue.append([root_spec])
+        visited = []
+        while queue:
+            path = queue.pop(0)
+            node = path[-1]
+            if node in visited:
+                continue
+            visited.append(node)
+            if node == target_spec:
+                return path
+            children = []
+            specs = self.specs_by_name.get(node.name)
+            if specs is None:
+                continue
+            for _, deps in specs.items():
+                children.extend(list(deps))
+            for adj in children:
+                if adj.name == target_spec.name and adj.version != target_spec.version:
+                    pass
+                else:
+                    new_path = list(path)
+                    new_path.append(adj)
+                    queue.append(new_path)
 
 
 # if __name__ == "__main__":

diff --git a/conda/models/records.py b/conda/models/records.py
@@ -352,6 +352,12 @@ def to_match_spec(self):
             build=self.build,
         )
 
+    def to_simple_match_spec(self):
+        return MatchSpec(
+            name=self.name,
+            version=self.version,
+        )
+
     @property
     def namekey(self):
         return "global:" + self.name

diff --git a/conda/resolve.py b/conda/resolve.py
@@ -10,6 +10,7 @@
 from ._vendor.auxlib.collection import frozendict
 from ._vendor.auxlib.decorators import memoize, memoizemethod
 from ._vendor.toolz import concat, groupby
+from ._vendor.tqdm import tqdm
 from .base.constants import ChannelPriority, MAX_CHANNEL_PRIORITY, SatSolverChoice
 from .base.context import context
 from .common.compat import iteritems, iterkeys, itervalues, odict, on_win, text_type
@@ -22,7 +23,6 @@
 from .models.channel import Channel, MultiChannel
 from .models.enums import NoarchType, PackageType
 from .models.match_spec import MatchSpec
-from .models.prefix_graph import GeneralGraph
 from .models.records import PackageRecord
 from .models.version import VersionOrder
 
@@ -287,7 +287,6 @@ def _classify_bad_deps(self, bad_deps, specs_to_add, history_specs, strict_chann
                    'cuda': set(), }
         specs_to_add = set(MatchSpec(_) for _ in specs_to_add or [])
         history_specs = set(MatchSpec(_) for _ in history_specs or [])
-
         for chain in bad_deps:
             # sometimes chains come in as strings
             if chain[-1].name == 'python' and len(chain) > 1 and \
@@ -315,7 +314,7 @@ def _classify_bad_deps(self, bad_deps, specs_to_add, history_specs, strict_chann
                 if not match:
                     classes['direct'].add((tuple(chain), str(MatchSpec(chain[0], target=None))))
             else:
-                if len(chain) > 1 or not any(len(c) > 1 and c[0] == chain[0] for c in bad_deps):
+                if len(chain) > 1 or any(len(c) >= 1 and c[0] == chain[0] for c in bad_deps):
                     classes['direct'].add((tuple(chain),
                                            str(MatchSpec(chain[0], target=None))))
 
@@ -333,10 +332,57 @@ def find_matches_with_strict(self, ms, strict_channel_priority):
         return tuple(f for f in matches if f.channel.name == sole_source_channel_name)
 
     def find_conflicts(self, specs, specs_to_add=None, history_specs=None):
+        if not context.json:
+            print("\nFound conflicts! Looking for incompatible packages.\n"
+                  "This can take several minutes.  Press CTRL-C to abort.")
         bad_deps = self.build_conflict_map(specs, specs_to_add, history_specs)
         strict_channel_priority = context.channel_priority == ChannelPriority.STRICT
         raise UnsatisfiableError(bad_deps, strict=strict_channel_priority)
 
+    def group_and_merge_specs(self, bad_deps_for_spec):
+        bad_deps = []
+        bd = groupby(lambda x: x[-1].name and len(x), bad_deps_for_spec)
+        for _, group in bd.items():
+            if len(group) > 1:
+                try:
+                    last_merged_spec = MatchSpec.union(ch[-1] for ch in group)[0]
+                    bad_dep = group[0][0:-1]
+                    bad_dep.append(last_merged_spec)
+                    bad_deps.append(bad_dep)
+                except ValueError:
+                    bad_deps.extend(group)
+            else:
+                bad_deps.extend(group)
+        return bad_deps
+
+    def breadth_first_search_by_spec(self, root_spec, target_spec, allowed_specs):
+        """Return shorted path from root_spec to spec_name"""
+        queue = []
+        queue.append([root_spec])
+        visited = []
+        while queue:
+            path = queue.pop(0)
+            node = path[-1]
+            if node in visited:
+                continue
+            visited.append(node)
+            if node == target_spec:
+                return path
+            children = []
+            specs = [_.depends for _ in allowed_specs.get(node.name)] \
+                if node.name in allowed_specs.keys() else None
+            if specs is None:
+                continue
+            for deps in specs:
+                children.extend([MatchSpec(d) for d in deps])
+            for adj in children:
+                if adj.name == target_spec.name and adj.version != target_spec.version:
+                    pass
+                else:
+                    new_path = list(path)
+                    new_path.append(adj)
+                    queue.append(new_path)
+
     def build_conflict_map(self, specs, specs_to_add=None, history_specs=None):
         """Perform a deeper analysis on conflicting specifications, by attempting
         to find the common dependencies that might be the cause of conflicts.
@@ -384,38 +430,53 @@ def build_conflict_map(self, specs, specs_to_add=None, history_specs=None):
         bad_deps = []
         dep_collections = tuple(set(sdep.keys()) for sdep in sdeps.values())
         deps = set.union(*dep_collections) if dep_collections else []
-        # for each possible package being considered, look at how pools interact
-        for dep in deps:
-            sdeps_with_dep = {}
-            for k, v in sdeps.items():
-                if dep in v:
-                    sdeps_with_dep[k] = v
-            if len(sdeps_with_dep) <= 1:
-                continue
-            # if all of the pools overlap, we're good.  Next dep.
-            if bool(set.intersection(*[v[dep] for v in sdeps_with_dep.values()])):
-                continue
-            # start out filtering nothing.  invalid_chains will tweak this dict to filter more
-            #    as it goes
-            records = set.union(*tuple(rec for records in sdeps_with_dep.values()
-                                       for rec in records.values()))
-            # determine the invalid chains for each specific spec.  Each of these chains
-            #    should start with `spec` and end with the first encountered conflict.  A
-            #    conflict is something that is either not available at all, or is present in
-            #    more than one pool, but those pools do not all overlap.
-
-            records_for_graph = groupby(lambda r: r.name,
-                                        (r for r in records if isinstance(r, PackageRecord)))
-            # records_per_name is a completely arbitrary number here.  It is meant to gather more
-            # than just one record, to explore the space of dependencies a bit.  Doing all of them
-            #    can be an enormous problem, though.  This is hopefully a good compromise.
-            records_per_name = 7
-            g = GeneralGraph([_ for v in records_for_graph.values() for _ in v[:records_per_name]])
-            spec_order = sorted(sdeps_with_dep.keys(),
-                                key=lambda x: list(g.graph_by_name.keys()).index(x.name))
-            for spec in spec_order:
-                # the DFS approach works well when things are actually in the graph
-                bad_deps.extend(g.depth_first_search_by_name(spec, dep, sdeps[spec]))
+
+        with tqdm(total=len(deps), desc="Finding conflicts",
+                  leave=False, disable=context.json) as t:
+            for dep in deps:
+                t.set_description("Examining {}".format(dep))
+                t.update()
+                sdeps_with_dep = {}
+                for k, v in sdeps.items():
+                    if dep in v:
+                        sdeps_with_dep[k] = v
+                if len(sdeps_with_dep) <= 1:
+                    continue
+                # if all of the pools overlap, we're good.  Next dep.
+                if bool(set.intersection(*[v[dep] for v in sdeps_with_dep.values()])):
+                    continue
+                spec_order = sdeps_with_dep.keys()
+                for spec in tqdm(spec_order, desc="Comparing specs that have this dependency",
+                                 leave=False, disable=context.json):
+                    allowed_specs = sdeps[spec]
+                    dep_vers = []
+                    for key, val in allowed_specs.items():
+                        if key != [_.name for _ in spec_order]:
+                            dep_vers.extend([v.depends for v in val])
+                    dep_ms = {MatchSpec(p) for pkgs in dep_vers for p in pkgs if dep in p}
+                    dep_ms.update(msspec for msspec in sdeps.keys() if msspec.name == dep)
+                    bad_deps_for_spec = []
+                    # # sort specs from least specific to most specific.  Only continue
+                    # #   to examine a dep if a conflict hasn't been found for its name
+                    # dep_ms = sorted(list(dep_ms), key=lambda x: (
+                    #     exactness_and_number_of_deps(self, x), x.dist_str()))
+                    # conflicts_found = set()
+                    with tqdm(total=len(dep_ms), desc="Finding conflict paths",
+                              leave=False, disable=context.json) as t2:
+                        for conflicting_spec in dep_ms:
+                            t2.set_description("Finding shortest conflict path for {}"
+                                               .format(conflicting_spec))
+                            t2.update()
+                            if conflicting_spec.name == spec.name:
+                                chain = [conflicting_spec] if \
+                                    conflicting_spec.version == spec.version else None
+                            else:
+                                chain = self.breadth_first_search_by_spec(
+                                    spec, conflicting_spec, allowed_specs)
+                            if chain:
+                                bad_deps_for_spec.append(chain)
+                    if bad_deps_for_spec:
+                        bad_deps.extend(self.group_and_merge_specs(bad_deps_for_spec))
 
         if not bad_deps:
             # no conflicting nor missing packages found, return the bad specs
@@ -427,6 +488,7 @@ def build_conflict_map(self, specs, specs_to_add=None, history_specs=None):
                 deps = groupby(lambda x: x.name, deps)
 
                 bad_deps.extend([[spec, MatchSpec.union(_)[0]] for _ in deps.values()])
+
         bad_deps = self._classify_bad_deps(bad_deps, specs_to_add, history_specs,
                                            strict_channel_priority)
         return bad_deps

diff --git a/tests/models/test_prefix_graph.py b/tests/models/test_prefix_graph.py
@@ -9,7 +9,8 @@
 from conda.exceptions import CyclicalDependencyError
 from conda.models.match_spec import MatchSpec
 import conda.models.prefix_graph
-from conda.models.prefix_graph import PrefixGraph
+from conda.models.prefix_graph import PrefixGraph, GeneralGraph
+from conda.models.records import PackageRecord
 import pytest
 from tests.core.test_solve import get_solver_4, get_solver_5
 
@@ -920,3 +921,50 @@ def test_deep_cyclical_dependency():
         'sqlite',
     )
     assert nodes == order
+
+
+def test_general_graph_bfs_simple():
+    a = PackageRecord(name="a", version="1", build="0", build_number=0, depends=["b", "c", "d"])
+    b = PackageRecord(name="b", version="1", build="0", build_number=0, depends=["e"])
+    c = PackageRecord(name="c", version="1", build="0", build_number=0)
+    d = PackageRecord(name="d", version="1", build="0", build_number=0, depends=["f", "g"])
+    e = PackageRecord(name="e", version="1", build="0", build_number=0)
+    f = PackageRecord(name="f", version="1", build="0", build_number=0)
+    g = PackageRecord(name="g", version="1", build="0", build_number=0)
+    records = [a, b, c, d, e, f, g]
+    graph = GeneralGraph(records)
+
+    a_to_c = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("c"))
+    assert a_to_c == [MatchSpec("a"), MatchSpec("c")]
+
+    a_to_f = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("f"))
+    assert a_to_f == [MatchSpec("a"), MatchSpec("d"), MatchSpec("f")]
+
+    a_to_a = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("a"))
+    assert a_to_a == [MatchSpec("a")]
+
+    a_to_not_exist = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("z"))
+    assert a_to_not_exist is None
+
+    backwards = graph.breadth_first_search_by_name(MatchSpec("d"), MatchSpec("a"))
+    assert backwards is None
+
+
+def test_general_graph_bfs_version():
+    a = PackageRecord(name="a", version="1", build="0", build_number=0, depends=["b", "c", "d"])
+    b = PackageRecord(name="b", version="1", build="0", build_number=0, depends=["e"])
+    c = PackageRecord(name="c", version="1", build="0", build_number=0, depends=["g=1"])
+    d = PackageRecord(name="d", version="1", build="0", build_number=0, depends=["f", "g=2"])
+    e = PackageRecord(name="e", version="1", build="0", build_number=0)
+    f = PackageRecord(name="f", version="1", build="0", build_number=0)
+    g1 = PackageRecord(name="g", version="1", build="0", build_number=0)
+    g2 = PackageRecord(name="g", version="2", build="0", build_number=0)
+    records = [a, b, c, d, e, f, g1, g2]
+    graph = GeneralGraph(records)
+
+    a_to_g1 = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("g=1"))
+    assert a_to_g1 == [MatchSpec("a"), MatchSpec("c"), MatchSpec("g=1")]
+
+    a_to_g2 = graph.breadth_first_search_by_name(MatchSpec("a"), MatchSpec("g=2"))
+    assert a_to_g2 == [MatchSpec("a"), MatchSpec("d"), MatchSpec("g=2")]
+