Skip to content

Commit

Permalink
dvc: optimize all target specific commands to not build graph (iterat…
Browse files Browse the repository at this point in the history
  • Loading branch information
Suor authored Mar 15, 2020
1 parent 767433d commit e69b44d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 5 deletions.
16 changes: 11 additions & 5 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,28 +179,34 @@ def collect(self, target, with_deps=False, recursive=False, graph=None):
import networkx as nx
from dvc.stage import Stage

G = graph or self.graph

if not target:
return list(G)
return list(graph) if graph else self.stages

target = os.path.abspath(target)

if recursive and os.path.isdir(target):
stages = nx.dfs_postorder_nodes(G)
stages = nx.dfs_postorder_nodes(graph or self.graph)
return [stage for stage in stages if path_isin(stage.path, target)]

stage = Stage.load(self, target)

# Optimization: do not collect the graph for a specific target
if not with_deps:
return [stage]

pipeline = get_pipeline(get_pipelines(G), stage)
pipeline = get_pipeline(get_pipelines(graph or self.graph), stage)
return list(nx.dfs_postorder_nodes(pipeline, stage))

def collect_granular(self, target, *args, **kwargs):
from dvc.stage import Stage

if not target:
return [(stage, None) for stage in self.stages]

# Optimization: do not collect the graph for a specific .dvc target
if Stage.is_valid_filename(target) and not kwargs.get("with_deps"):
return [(Stage.load(self, target), None)]

try:
(out,) = self.find_outs_by_path(target, strict=False)
filter_info = PathInfo(os.path.abspath(target))
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from funcy import raiser
import pytest

from dvc.repo import locked
Expand Down Expand Up @@ -66,3 +67,18 @@ def test_locked(mocker):
mocker.call.method(repo, args, kwargs),
mocker.call._reset(),
]


def test_collect_optimization(tmp_dir, dvc, mocker):
(stage,) = tmp_dir.dvc_gen("foo", "foo text")

# Forget cached stages and graph and error out on collection
dvc._reset()
mocker.patch(
"dvc.repo.Repo.stages",
property(raiser(Exception("Should not collect"))),
)

# Should read stage directly instead of collecting the whole graph
dvc.collect(stage.path)
dvc.collect_granular(stage.path)

0 comments on commit e69b44d

Please sign in to comment.