From 058e19fed017425c77f36dfbfbf067109b0381ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Wed, 22 Jan 2020 21:30:12 +0000 Subject: [PATCH 1/8] pipeline: show: outs: eliminate extra nodes in DAG --- dvc/command/pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index cbec6abf31..ed806bc4a8 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -36,8 +36,8 @@ def __build_graph(self, target, commands, outs): from dvc.stage import Stage from dvc.repo.graph import get_pipeline - stage = Stage.load(self.repo, target) - G = get_pipeline(self.repo.pipelines, stage) + target_stage = Stage.load(self.repo, target) + G = get_pipeline(self.repo.pipelines, target_stage) nodes = [] for stage in G: @@ -52,7 +52,7 @@ def __build_graph(self, target, commands, outs): nodes.append(stage.relpath) edges = [] - for from_stage, to_stage in G.edges(): + for from_stage, to_stage in networkx.dfs_edges(G, target_stage): if commands: if to_stage.cmd is None: continue From a299fba15d3fd49b5955e1453a03d54695971771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Thu, 23 Jan 2020 11:47:37 +0000 Subject: [PATCH 2/8] connect deps to outs --- dvc/command/pipeline.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index ed806bc4a8..36d53c36e1 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -52,15 +52,16 @@ def __build_graph(self, target, commands, outs): nodes.append(stage.relpath) edges = [] - for from_stage, to_stage in networkx.dfs_edges(G, target_stage): + for from_stage, to_stage in G.edges(): if commands: if to_stage.cmd is None: continue edges.append((from_stage.cmd, to_stage.cmd)) elif outs: - for from_out in from_stage.outs: + for from_dep in from_stage.deps: for to_out in to_stage.outs: - edges.append((str(from_out), str(to_out))) + if from_dep.path_info == to_out.path_info: + edges.append((str(from_dep), str(to_out))) else: edges.append((from_stage.relpath, to_stage.relpath)) From b635fdddbd346457f4738096c57de4e29719eba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Thu, 23 Jan 2020 11:54:15 +0000 Subject: [PATCH 3/8] connect outs to deps --- dvc/command/pipeline.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index 36d53c36e1..8fc35f8641 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -52,16 +52,15 @@ def __build_graph(self, target, commands, outs): nodes.append(stage.relpath) edges = [] - for from_stage, to_stage in G.edges(): + for from_stage, to_stage in networkx.dfs_edges(G, target_stage): if commands: if to_stage.cmd is None: continue edges.append((from_stage.cmd, to_stage.cmd)) elif outs: for from_dep in from_stage.deps: - for to_out in to_stage.outs: - if from_dep.path_info == to_out.path_info: - edges.append((str(from_dep), str(to_out))) + for from_out in from_stage.outs: + edges.append((str(from_out), str(from_dep))) else: edges.append((from_stage.relpath, to_stage.relpath)) From d827fef8c4f8eccd062f1fb95b9bdbb28f29674c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Thu, 23 Jan 2020 12:39:07 +0000 Subject: [PATCH 4/8] add missing nodes --- dvc/command/pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index 8fc35f8641..e6a012a0e2 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -40,7 +40,7 @@ def __build_graph(self, target, commands, outs): G = get_pipeline(self.repo.pipelines, target_stage) nodes = [] - for stage in G: + for stage in networkx.preorder_nodes(G, target_stage): if commands: if stage.cmd is None: continue @@ -48,6 +48,8 @@ def __build_graph(self, target, commands, outs): elif outs: for out in stage.outs: nodes.append(str(out)) + for dep in stage.deps: + nodes.append(str(dep)) else: nodes.append(stage.relpath) From 233790ced5a1772a3780c03ba65bd06f8141c02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Thu, 23 Jan 2020 17:54:00 +0000 Subject: [PATCH 5/8] add test and traverse nodes when showing outs --- dvc/command/pipeline.py | 39 ++++++++++++++++++++----------------- tests/func/test_pipeline.py | 24 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index e6a012a0e2..4b2893adc4 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -39,34 +39,37 @@ def __build_graph(self, target, commands, outs): target_stage = Stage.load(self.repo, target) G = get_pipeline(self.repo.pipelines, target_stage) - nodes = [] - for stage in networkx.preorder_nodes(G, target_stage): + nodes = set() + for stage in networkx.dfs_preorder_nodes(G, target_stage): if commands: if stage.cmd is None: continue - nodes.append(stage.cmd) + nodes.add(stage.cmd) elif outs: for out in stage.outs: - nodes.append(str(out)) + nodes.add(str(out)) for dep in stage.deps: - nodes.append(str(dep)) + nodes.add(str(dep)) else: - nodes.append(stage.relpath) + nodes.add(stage.relpath) edges = [] - for from_stage, to_stage in networkx.dfs_edges(G, target_stage): - if commands: - if to_stage.cmd is None: - continue - edges.append((from_stage.cmd, to_stage.cmd)) - elif outs: - for from_dep in from_stage.deps: - for from_out in from_stage.outs: - edges.append((str(from_out), str(from_dep))) - else: - edges.append((from_stage.relpath, to_stage.relpath)) - return nodes, edges, networkx.is_tree(G) + if outs and not commands: + for stage in networkx.dfs_preorder_nodes(G, target_stage): + for dep in stage.deps: + for out in stage.outs: + edges.append((str(out), str(dep))) + else: + for from_stage, to_stage in networkx.dfs_edges(G, target_stage): + if commands: + if to_stage.cmd is None: + continue + edges.append((from_stage.cmd, to_stage.cmd)) + else: + edges.append((from_stage.relpath, to_stage.relpath)) + + return list(nodes), edges, networkx.is_tree(G) def _show_ascii(self, target, commands, outs): from dvc.dagascii import draw diff --git a/tests/func/test_pipeline.py b/tests/func/test_pipeline.py index d6420a4173..c88406e570 100644 --- a/tests/func/test_pipeline.py +++ b/tests/func/test_pipeline.py @@ -1,6 +1,7 @@ import logging from dvc.main import main +from dvc.command.pipeline import CmdPipelineShow from tests.basic_env import TestDvc from tests.func.test_repro import TestRepro from tests.func.test_repro import TestReproChangedDeepData @@ -98,6 +99,29 @@ def test_dot_commands(self): self.assertEqual(ret, 0) +def test_disconnected_stage(tmp_dir, dvc, caplog): + tmp_dir.dvc_gen({"base": "base"}) + + dvc.add("base") + dvc.run(deps=["base"], outs=["derived1"], cmd="echo derived1 > derived1") + dvc.run(deps=["base"], outs=["derived2"], cmd="echo derived2 > derived2") + final_stage = dvc.run( + deps=["derived1"], outs=["final"], cmd="echo final > final" + ) + + args = ["pipeline", "show", "--outs", "--dot", final_stage.path] + with tmp_dir.chdir(): + command = CmdPipelineShow(args) + # Need to test __build_graph directly + nodes, edges, is_tree = command._CmdPipelineShow__build_graph( + final_stage.path, commands=False, outs=True + ) + + assert set(nodes) == set(["final", "derived1", "base"]) + assert edges == [("final", "derived1"), ("derived1", "base")] + assert is_tree is True + + def test_print_locked_stages(tmp_dir, dvc, caplog): tmp_dir.dvc_gen({"foo": "foo content", "bar": "bar content"}) dvc.lock_stage("foo.dvc") From a7bad881591f284129582dd925ef935fb59e0200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Tue, 28 Jan 2020 11:49:24 +0000 Subject: [PATCH 6/8] remove unnecessary chdir --- tests/func/test_pipeline.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/func/test_pipeline.py b/tests/func/test_pipeline.py index c88406e570..88096613b2 100644 --- a/tests/func/test_pipeline.py +++ b/tests/func/test_pipeline.py @@ -110,12 +110,11 @@ def test_disconnected_stage(tmp_dir, dvc, caplog): ) args = ["pipeline", "show", "--outs", "--dot", final_stage.path] - with tmp_dir.chdir(): - command = CmdPipelineShow(args) - # Need to test __build_graph directly - nodes, edges, is_tree = command._CmdPipelineShow__build_graph( - final_stage.path, commands=False, outs=True - ) + command = CmdPipelineShow(args) + # Need to test __build_graph directly + nodes, edges, is_tree = command._CmdPipelineShow__build_graph( + final_stage.path, commands=False, outs=True + ) assert set(nodes) == set(["final", "derived1", "base"]) assert edges == [("final", "derived1"), ("derived1", "base")] From 11ad0a980322ad1f7a939f392c44a88831d825ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Mon, 3 Feb 2020 18:41:55 +0000 Subject: [PATCH 7/8] remove __ --- dvc/command/pipeline.py | 14 ++++++-------- tests/func/test_pipeline.py | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index 4b2893adc4..3c02d59528 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -31,7 +31,7 @@ def _show(self, target, commands, outs, locked): else: logger.info(stage.path_in_repo) - def __build_graph(self, target, commands, outs): + def _build_graph(self, target, commands, outs): import networkx from dvc.stage import Stage from dvc.repo.graph import get_pipeline @@ -74,7 +74,7 @@ def __build_graph(self, target, commands, outs): def _show_ascii(self, target, commands, outs): from dvc.dagascii import draw - nodes, edges, _ = self.__build_graph(target, commands, outs) + nodes, edges, _ = self._build_graph(target, commands, outs) if not nodes: return @@ -84,7 +84,7 @@ def _show_ascii(self, target, commands, outs): def _show_dependencies_tree(self, target, commands, outs): from treelib import Tree - nodes, edges, is_tree = self.__build_graph(target, commands, outs) + nodes, edges, is_tree = self._build_graph(target, commands, outs) if not nodes: return if not is_tree: @@ -105,12 +105,12 @@ def _show_dependencies_tree(self, target, commands, outs): observe_list.pop(0) tree.show() - def __write_dot(self, target, commands, outs): + def _write_dot(self, target, commands, outs): import io import networkx from networkx.drawing.nx_pydot import write_dot - _, edges, _ = self.__build_graph(target, commands, outs) + _, edges, _ = self._build_graph(target, commands, outs) edges = [edge[::-1] for edge in edges] simple_g = networkx.DiGraph() @@ -131,9 +131,7 @@ def run(self): target, self.args.commands, self.args.outs ) elif self.args.dot: - self.__write_dot( - target, self.args.commands, self.args.outs - ) + self._write_dot(target, self.args.commands, self.args.outs) elif self.args.tree: self._show_dependencies_tree( target, self.args.commands, self.args.outs diff --git a/tests/func/test_pipeline.py b/tests/func/test_pipeline.py index 88096613b2..b2cebe4dab 100644 --- a/tests/func/test_pipeline.py +++ b/tests/func/test_pipeline.py @@ -112,7 +112,7 @@ def test_disconnected_stage(tmp_dir, dvc, caplog): args = ["pipeline", "show", "--outs", "--dot", final_stage.path] command = CmdPipelineShow(args) # Need to test __build_graph directly - nodes, edges, is_tree = command._CmdPipelineShow__build_graph( + nodes, edges, is_tree = command._build_graph( final_stage.path, commands=False, outs=True ) From 758205a52a5929054b6406925bfb0266376326a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Mon, 3 Feb 2020 18:45:44 +0000 Subject: [PATCH 8/8] PR feedback --- dvc/command/pipeline.py | 2 +- tests/func/test_pipeline.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index 3c02d59528..510e1feb1b 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -55,7 +55,7 @@ def _build_graph(self, target, commands, outs): edges = [] - if outs and not commands: + if outs: for stage in networkx.dfs_preorder_nodes(G, target_stage): for dep in stage.deps: for out in stage.outs: diff --git a/tests/func/test_pipeline.py b/tests/func/test_pipeline.py index b2cebe4dab..31ed66748e 100644 --- a/tests/func/test_pipeline.py +++ b/tests/func/test_pipeline.py @@ -99,7 +99,7 @@ def test_dot_commands(self): self.assertEqual(ret, 0) -def test_disconnected_stage(tmp_dir, dvc, caplog): +def test_disconnected_stage(tmp_dir, dvc): tmp_dir.dvc_gen({"base": "base"}) dvc.add("base") @@ -109,14 +109,13 @@ def test_disconnected_stage(tmp_dir, dvc, caplog): deps=["derived1"], outs=["final"], cmd="echo final > final" ) - args = ["pipeline", "show", "--outs", "--dot", final_stage.path] - command = CmdPipelineShow(args) + command = CmdPipelineShow([]) # Need to test __build_graph directly nodes, edges, is_tree = command._build_graph( final_stage.path, commands=False, outs=True ) - assert set(nodes) == set(["final", "derived1", "base"]) + assert set(nodes) == {"final", "derived1", "base"} assert edges == [("final", "derived1"), ("derived1", "base")] assert is_tree is True