Skip to content

Commit

Permalink
gc: add --skip-failed
Browse files Browse the repository at this point in the history
  • Loading branch information
dberenbaum authored and efiop committed Dec 19, 2023
1 parent 7821e12 commit 827f9a7
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 14 deletions.
7 changes: 7 additions & 0 deletions dvc/commands/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def run(self): # noqa: C901, PLR0912
num=self.args.num,
not_in_remote=self.args.not_in_remote,
dry=self.args.dry,
skip_failed=self.args.skip_failed,
)
return 0

Expand Down Expand Up @@ -188,6 +189,12 @@ def add_parser(subparsers, parent_parser):
help="Remote storage to collect garbage in",
metavar="<name>",
)
gc_parser.add_argument(
"--skip-failed",
action="store_true",
default=False,
help="Skip revisions that fail when collected.",
)
gc_parser.add_argument(
"-f",
"--force",
Expand Down
12 changes: 12 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,3 +357,15 @@ def __init__(

desc = f" @ {stage or version}" if (stage or version) else ""
super().__init__(f"Unable to find artifact '{name}{desc}'")


class RevCollectionError(DvcException):
"""Thrown if a revision failed to be collected.
Args:
rev (str): revision that failed (or "workspace").
"""

def __init__(self, rev):
self.rev = rev
super().__init__(f"Failed to collect '{rev}'")
38 changes: 25 additions & 13 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
Union,
)

from dvc.exceptions import NotDvcRepoError, OutputNotFoundError
from dvc.exceptions import (
DvcException,
NotDvcRepoError,
OutputNotFoundError,
RevCollectionError,
)
from dvc.ignore import DvcIgnoreFilter
from dvc.log import logger
from dvc.utils.objects import cached_property
Expand Down Expand Up @@ -492,6 +497,7 @@ def used_objs( # noqa: PLR0913
revs=None,
num=1,
push: bool = False,
skip_failed: bool = False,
):
"""Get the stages related to the given target and collect
the `info` of its outputs.
Expand All @@ -510,7 +516,7 @@ def used_objs( # noqa: PLR0913
"""
used = defaultdict(set)

for _ in self.brancher(
for rev in self.brancher(
revs=revs,
all_branches=all_branches,
all_tags=all_tags,
Expand All @@ -519,17 +525,23 @@ def used_objs( # noqa: PLR0913
commit_date=commit_date,
num=num,
):
for odb, objs in self.index.used_objs(
targets,
remote=remote,
force=force,
jobs=jobs,
recursive=recursive,
with_deps=with_deps,
push=push,
).items():
used[odb].update(objs)

try:
for odb, objs in self.index.used_objs(
targets,
remote=remote,
force=force,
jobs=jobs,
recursive=recursive,
with_deps=with_deps,
push=push,
).items():
used[odb].update(objs)
except DvcException as exc:
rev = rev or "workspace"
if skip_failed:
logger.warning("Failed to collect '%s', skipping", rev)
else:
raise RevCollectionError(rev) from exc
if used_run_cache:
for odb, objs in self.stage_cache.get_used_objs(
used_run_cache, remote=remote, force=force, jobs=jobs
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def gc( # noqa: PLR0913, C901
num: Optional[int] = None,
not_in_remote: bool = False,
dry: bool = False,
skip_failed: bool = False,
):
# require `workspace` to be true to come into effect.
# assume `workspace` to be enabled if any of `all_tags`, `all_commits`,
Expand Down Expand Up @@ -113,6 +114,7 @@ def gc( # noqa: PLR0913, C901
jobs=jobs,
revs=[rev] if rev else None,
num=num or 1,
skip_failed=skip_failed,
).items():
if odb not in odb_to_obj_ids:
odb_to_obj_ids[odb] = set()
Expand Down
12 changes: 11 additions & 1 deletion tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest

from dvc.cli import main
from dvc.exceptions import CollectCacheError, InvalidArgumentError
from dvc.exceptions import CollectCacheError, InvalidArgumentError, RevCollectionError
from dvc.fs import LocalFileSystem
from dvc.utils.fs import remove
from dvc_data.hashfile.db.local import LocalHashFileDB
Expand Down Expand Up @@ -439,3 +439,13 @@ def test_gc_logging(caplog, dvc, good_and_bad_cache):
assert "Removed 3 objects from repo cache." in caplog.text
assert "No unused 'local' cache to remove." in caplog.text
assert "No unused 'legacy' cache to remove." in caplog.text


def test_gc_skip_failed(tmp_dir, dvc):
with open("dvc.yaml", mode="w") as f:
f.write("\ninvalid")

with pytest.raises(RevCollectionError):
dvc.gc(force=True, workspace=True)

dvc.gc(force=True, workspace=True, skip_failed=True)
2 changes: 2 additions & 0 deletions tests/unit/command/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_(dvc, scm, mocker):
"--projects",
"project1",
"project2",
"--skip-failed",
]
)
assert cli_args.func == CmdGC
Expand All @@ -51,6 +52,7 @@ def test_(dvc, scm, mocker):
num=None,
not_in_remote=False,
dry=True,
skip_failed=True,
)

cli_args = parse_args(["gc"])
Expand Down

0 comments on commit 827f9a7

Please sign in to comment.