From 21af7438db3fb318feb773e0767e4f5c7b1d4e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 16 Jul 2021 15:36:04 +0200 Subject: [PATCH] plots/metrics/params/experiments show: stop throwing exceptions (#5984) * params/metrics/plots/exp show: introduce onerror * fixup * plots/metrics/params: removing errors * removing more errors * removing errors * some more removing * new plots ux * new ux for metrics * new ux for params * fixing tests * fixups * minor test fixes * minor test fixes * commands: plots/params: fix ui imports * working on error handlng decorator, stopped at show checkpoints * error_handler, further dvelopment * plots just before moving loading to collect * appending errors to results: initial passing * fixup * onerror: remove onerror fixture * fixups * plots/metrics/params/experiments: temporarily remove error warnings * plots: extract parsing out of class method * metrics/experiments: provide exception serializer for show-json * params/plots/metrics: signal errors * rebase fixups * fix TODOs * fixup * skshetry's review refactor --- dvc/command/experiments.py | 40 +++- dvc/command/live.py | 11 +- dvc/command/metrics.py | 12 +- dvc/command/plots.py | 42 +++-- dvc/compare.py | 3 +- dvc/dependency/param.py | 2 +- dvc/exceptions.py | 31 ---- dvc/repo/collect.py | 15 +- dvc/repo/experiments/diff.py | 4 +- dvc/repo/experiments/show.py | 86 +++++---- dvc/repo/live.py | 11 +- dvc/repo/metrics/diff.py | 15 +- dvc/repo/metrics/show.py | 74 ++++---- dvc/repo/params/diff.py | 17 +- dvc/repo/params/show.py | 94 ++++++---- dvc/repo/plots/__init__.py | 245 ++++++++++++++----------- dvc/repo/plots/data.py | 52 +----- dvc/utils/__init__.py | 34 +++- dvc/utils/collections.py | 11 ++ dvc/utils/diff.py | 6 +- dvc/utils/serialize/_json.py | 6 + tests/func/experiments/test_show.py | 106 +++++++---- tests/func/metrics/test_show.py | 128 +++++++++---- tests/func/params/test_show.py | 93 ++++++++-- tests/func/plots/test_show.py | 136 ++++++++------ tests/func/test_live.py | 18 +- tests/unit/command/test_experiments.py | 2 +- tests/unit/command/test_live.py | 4 +- tests/unit/command/test_params.py | 7 +- tests/unit/command/test_plots.py | 3 +- tests/unit/test_compare.py | 126 +++++++++---- tests/unit/test_metrics.py | 2 +- tests/unit/test_params.py | 2 +- 33 files changed, 861 insertions(+), 577 deletions(-) diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index c8021f12f1..4c7fd71a1e 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -16,6 +16,7 @@ from dvc.exceptions import DvcException, InvalidArgumentError from dvc.ui import ui from dvc.utils.flatten import flatten +from dvc.utils.serialize import encode_exception if TYPE_CHECKING: from rich.text import Text @@ -28,6 +29,7 @@ SHOW_MAX_WIDTH = 1024 FILL_VALUE = "-" +FILL_VALUE_ERRORED = "!" def _filter_name(names, label, filter_strs): @@ -90,6 +92,7 @@ def _filter_names( def _update_names(names, items): for name, item in items: + item = item.get("data", {}) if isinstance(item, dict): item = flatten(item) names[name].update({key: None for key in item}) @@ -100,7 +103,8 @@ def _collect_names(all_experiments, **kwargs): param_names = defaultdict(dict) for _, experiments in all_experiments.items(): - for exp in experiments.values(): + for exp_data in experiments.values(): + exp = exp_data.get("data", {}) _update_names(metric_names, exp.get("metrics", {}).items()) _update_names(param_names, exp.get("params", {}).items()) metric_names = _filter_names( @@ -150,7 +154,8 @@ def _collect_rows( ) new_checkpoint = True - for i, (rev, exp) in enumerate(experiments.items()): + for i, (rev, results) in enumerate(experiments.items()): + exp = results.get("data", {}) if exp.get("running"): state = "Running" elif exp.get("queued"): @@ -169,7 +174,7 @@ def _collect_rows( tip = exp.get("checkpoint_tip") parent_rev = exp.get("checkpoint_parent", "") - parent_exp = experiments.get(parent_rev, {}) + parent_exp = experiments.get(parent_rev, {}).get("data", {}) parent_tip = parent_exp.get("checkpoint_tip") parent = "" @@ -202,10 +207,21 @@ def _collect_rows( state, executor, ] + fill_value = FILL_VALUE_ERRORED if results.get("error") else FILL_VALUE _extend_row( - row, metric_names, exp.get("metrics", {}).items(), precision + row, + metric_names, + exp.get("metrics", {}).items(), + precision, + fill_value=fill_value, + ) + _extend_row( + row, + param_names, + exp.get("params", {}).items(), + precision, + fill_value=fill_value, ) - _extend_row(row, param_names, exp.get("params", {}).items(), precision) yield row @@ -268,19 +284,23 @@ def _format_time(timestamp): return timestamp.strftime(fmt) -def _extend_row(row, names, items, precision): +def _extend_row(row, names, items, precision, fill_value=FILL_VALUE): from rich.text import Text from dvc.compare import _format_field, with_value if not items: - row.extend(FILL_VALUE for keys in names.values() for _ in keys) + row.extend(fill_value for keys in names.values() for _ in keys) return - for fname, item in items: + for fname, data in items: + item = data.get("data", {}) item = flatten(item) if isinstance(item, dict) else {fname: item} for name in names[fname]: - value = with_value(item.get(name), FILL_VALUE) + value = with_value( + item.get(name), + FILL_VALUE_ERRORED if data.get("error", None) else fill_value, + ) # wrap field data in rich.Text, otherwise rich may # interpret unescaped braces from list/dict types as rich # markup tags @@ -450,7 +470,7 @@ def _normalize_headers(names): def _format_json(item): if isinstance(item, (date, datetime)): return item.isoformat() - raise TypeError + return encode_exception(item) class CmdExperimentsShow(CmdBase): diff --git a/dvc/command/live.py b/dvc/command/live.py index b0959663a5..5995e1d934 100644 --- a/dvc/command/live.py +++ b/dvc/command/live.py @@ -12,12 +12,13 @@ class CmdLive(CmdBase): def _run(self, target, revs=None): metrics, plots = self.repo.live.show(target=target, revs=revs) - html_path = self.args.target + ".html" - self.repo.plots.write_html(html_path, plots, metrics) + if plots: + html_path = self.args.target + ".html" + self.repo.plots.write_html(html_path, plots, metrics) - ui.write("\nfile://", os.path.abspath(html_path), sep="") - - return 0 + ui.write("\nfile://", os.path.abspath(html_path), sep="") + return 0 + return 1 class CmdLiveShow(CmdLive): diff --git a/dvc/command/metrics.py b/dvc/command/metrics.py index 0823ff4e69..f8c5223e64 100644 --- a/dvc/command/metrics.py +++ b/dvc/command/metrics.py @@ -3,8 +3,9 @@ from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link, fix_subparsers -from dvc.exceptions import BadMetricError, DvcException +from dvc.exceptions import DvcException from dvc.ui import ui +from dvc.utils.serialize import encode_exception logger = logging.getLogger(__name__) @@ -33,17 +34,10 @@ def run(self): if self.args.show_json: import json - ui.write(json.dumps(metrics)) + ui.write(json.dumps(metrics, default=encode_exception)) else: from dvc.compare import show_metrics - # When `metrics` contains a `None` key, it means that some files - # specified as `targets` in `repo.metrics.show` didn't contain any - # metrics. - missing = metrics.pop(None, None) - if missing: - raise BadMetricError(missing) - show_metrics( metrics, markdown=self.args.show_md, diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 3904f61fdd..7f752223a8 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -42,29 +42,35 @@ def run(self): ui.write(plots[target]) return 0 - rel: str = self.args.out or "plots.html" - path = (Path.cwd() / rel).resolve() - self.repo.plots.write_html( - path, plots=plots, html_template_path=self.args.html_template - ) - except DvcException: logger.exception("") return 1 - assert path.is_absolute() # as_uri throws ValueError if not absolute - url = path.as_uri() - ui.write(url) - if self.args.open: - import webbrowser - - opened = webbrowser.open(rel) - if not opened: - ui.error_write( - "Failed to open. Please try opening it manually." - ) - return 1 + if plots: + rel: str = self.args.out or "plots.html" + path: Path = (Path.cwd() / rel).resolve() + self.repo.plots.write_html( + path, plots=plots, html_template_path=self.args.html_template + ) + assert ( + path.is_absolute() + ) # as_uri throws ValueError if not absolute + url = path.as_uri() + ui.write(url) + if self.args.open: + import webbrowser + + opened = webbrowser.open(rel) + if not opened: + ui.error_write( + "Failed to open. Please try opening it manually." + ) + return 1 + else: + ui.error_write( + "No plots were loaded, visualization file will not be created." + ) return 0 diff --git a/dvc/compare.py b/dvc/compare.py index d185d9b058..7f4b0798eb 100644 --- a/dvc/compare.py +++ b/dvc/compare.py @@ -281,8 +281,9 @@ def metrics_table( td = TabularData(["Revision", "Path"], fill_value="-") for branch, val in metrics.items(): - for fname, metric in val.items(): + for fname, metric in val.get("data", {}).items(): row_data: Dict[str, str] = {"Revision": branch, "Path": fname} + metric = metric.get("data", {}) flattened = ( flatten(format_dict(metric)) if isinstance(metric, dict) diff --git a/dvc/dependency/param.py b/dvc/dependency/param.py index f9c0059e5b..717196de81 100644 --- a/dvc/dependency/param.py +++ b/dvc/dependency/param.py @@ -98,7 +98,7 @@ def _read(self): f"Unable to read parameters from '{self}'" ) from exc - def read_params_d(self): + def read_params_d(self, **kwargs): config = self._read() ret = {} diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 2c08974f38..a3e91d7e0d 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -1,5 +1,4 @@ """Exceptions raised by the dvc.""" -from typing import List class DvcException(Exception): @@ -173,36 +172,6 @@ def __init__(self, paths): ) -class MetricsError(DvcException): - pass - - -class NoMetricsParsedError(MetricsError): - def __init__(self, command): - super().__init__( - f"Could not parse {command} files. Use `-v` option to see more " - "details." - ) - - -class NoMetricsFoundError(MetricsError): - def __init__(self, command, run_options): - super().__init__( - f"No {command} files in this repository. " - f"Use `{run_options}` options for " - f"`dvc run` to mark stage outputs as {command}." - ) - - -class MetricDoesNotExistError(MetricsError): - def __init__(self, targets: List[str]): - if len(targets) == 1: - msg = "'{}' does not exist." - else: - msg = "'{}' do not exist." - super().__init__(msg.format(", ".join(targets))) - - class RecursiveAddingWhileUsingFilename(DvcException): def __init__(self): super().__init__( diff --git a/dvc/repo/collect.py b/dvc/repo/collect.py index bb68dd4fd6..acc4f0497a 100644 --- a/dvc/repo/collect.py +++ b/dvc/repo/collect.py @@ -46,15 +46,12 @@ def _collect_paths( target_infos.extend(repo.dvcignore.walk_files(fs, path_info)) if not fs.exists(path_info): - if not recursive: - if rev == "workspace" or rev == "": - logger.warning( - "'%s' was not found in current workspace.", path_info - ) - else: - logger.warning( - "'%s' was not found at: '%s'.", path_info, rev - ) + if rev == "workspace" or rev == "": + logger.warning( + "'%s' was not found in current workspace.", path_info + ) + else: + logger.warning("'%s' was not found at: '%s'.", path_info, rev) continue target_infos.append(path_info) return target_infos diff --git a/dvc/repo/experiments/diff.py b/dvc/repo/experiments/diff.py index 4a99a292bc..38f2069503 100644 --- a/dvc/repo/experiments/diff.py +++ b/dvc/repo/experiments/diff.py @@ -35,8 +35,8 @@ def diff(repo, *args, a_rev=None, b_rev=None, param_deps=False, **kwargs): return { key: _diff( - format_dict(old[key]), - format_dict(new[key]), + format_dict(old.get("data", {}).get(key, {})), + format_dict(new.get("data", {}).get(key, {})), with_unchanged=with_unchanged, ) for key in ["metrics", "params"] diff --git a/dvc/repo/experiments/show.py b/dvc/repo/experiments/show.py index 44a73a175a..f1632a67e0 100644 --- a/dvc/repo/experiments/show.py +++ b/dvc/repo/experiments/show.py @@ -1,23 +1,32 @@ import logging from collections import OrderedDict, defaultdict from datetime import datetime +from typing import Any, Callable, Dict, Optional from dvc.exceptions import InvalidArgumentError from dvc.repo import locked from dvc.repo.experiments.base import ExpRefInfo from dvc.repo.experiments.executor.base import ExecutorInfo from dvc.repo.experiments.utils import fix_exp_head -from dvc.repo.metrics.show import _collect_metrics, _read_metrics -from dvc.repo.params.show import _collect_configs, _read_params +from dvc.repo.metrics.show import _gather_metrics +from dvc.repo.params.show import _gather_params from dvc.scm.base import SCMError +from dvc.utils import error_handler, onerror_collect logger = logging.getLogger(__name__) +@error_handler def _collect_experiment_commit( - repo, exp_rev, stash=False, sha_only=True, param_deps=False, running=None + repo, + exp_rev, + stash=False, + sha_only=True, + param_deps=False, + running=None, + onerror: Optional[Callable] = None, ): - res = defaultdict(dict) + res: Dict[str, Optional[Any]] = defaultdict(dict) for rev in repo.brancher(revs=[exp_rev]): if rev == "workspace": if exp_rev != "workspace": @@ -27,9 +36,8 @@ def _collect_experiment_commit( commit = repo.scm.resolve_commit(rev) res["timestamp"] = datetime.fromtimestamp(commit.commit_time) - params, params_path_infos = _collect_configs(repo, rev=rev) - params = _read_params( - repo, params, params_path_infos, rev, deps=param_deps + params = _gather_params( + repo, rev=rev, targets=None, deps=param_deps, onerror=onerror ) if params: res["params"] = params @@ -42,8 +50,9 @@ def _collect_experiment_commit( res["running"] = False res["executor"] = None if not stash: - metrics = _collect_metrics(repo, None, rev, False) - vals = _read_metrics(repo, metrics, rev) + vals = _gather_metrics( + repo, targets=None, rev=rev, recursive=False, onerror=onerror + ) res["metrics"] = vals if not sha_only and rev != "workspace": @@ -63,28 +72,34 @@ def _collect_experiment_commit( return res -def _collect_experiment_branch(res, repo, branch, baseline, **kwargs): +def _collect_experiment_branch( + res, repo, branch, baseline, onerror: Optional[Callable] = None, **kwargs +): exp_rev = repo.scm.resolve_rev(branch) prev = None revs = list(repo.scm.branch_revs(exp_rev, baseline)) for rev in revs: - collected_exp = _collect_experiment_commit(repo, rev, **kwargs) + collected_exp = _collect_experiment_commit( + repo, rev, onerror=onerror, **kwargs + ) if len(revs) > 1: exp = {"checkpoint_tip": exp_rev} if prev: - res[prev]["checkpoint_parent"] = rev + res[prev]["data"][ # type: ignore[unreachable] + "checkpoint_parent" + ] = rev if rev in res: - res[rev].update(exp) + res[rev]["data"].update(exp) res.move_to_end(rev) else: - exp.update(collected_exp) + exp.update(collected_exp["data"]) else: - exp = collected_exp + exp = collected_exp["data"] if rev not in res: - res[rev] = exp + res[rev] = {"data": exp} prev = rev if len(revs) > 1: - res[prev]["checkpoint_parent"] = baseline + res[prev]["data"]["checkpoint_parent"] = baseline return res @@ -98,8 +113,12 @@ def show( sha_only=False, num=1, param_deps=False, + onerror: Optional[Callable] = None, ): - res = defaultdict(OrderedDict) + if onerror is None: + onerror = onerror_collect + + res: Dict[str, Dict] = defaultdict(OrderedDict) if num < 1: raise InvalidArgumentError(f"Invalid number of commits '{num}'") @@ -133,6 +152,7 @@ def show( sha_only=sha_only, param_deps=param_deps, running=running, + onerror=onerror, ) if rev == "workspace": @@ -156,19 +176,21 @@ def show( sha_only=sha_only, param_deps=param_deps, running=running, + onerror=onerror, ) - - # collect queued (not yet reproduced) experiments - for stash_rev, entry in repo.experiments.stash_revs.items(): - if entry.baseline_rev in revs: - if stash_rev not in running or not running[stash_rev].get("last"): - experiment = _collect_experiment_commit( - repo, - stash_rev, - stash=stash_rev not in running, - param_deps=param_deps, - running=running, - ) - res[entry.baseline_rev][stash_rev] = experiment - + # collect queued (not yet reproduced) experiments + for stash_rev, entry in repo.experiments.stash_revs.items(): + if entry.baseline_rev in revs: + if stash_rev not in running or not running[stash_rev].get( + "last" + ): + experiment = _collect_experiment_commit( + repo, + stash_rev, + stash=stash_rev not in running, + param_deps=param_deps, + running=running, + onerror=onerror, + ) + res[entry.baseline_rev][stash_rev] = experiment return res diff --git a/dvc/repo/live.py b/dvc/repo/live.py index 6d71719a3b..5dcfee2fc8 100644 --- a/dvc/repo/live.py +++ b/dvc/repo/live.py @@ -1,10 +1,7 @@ -import contextlib import logging import os from typing import TYPE_CHECKING, List, Optional -from dvc.exceptions import MetricDoesNotExistError, MetricsError - logger = logging.getLogger(__name__) if TYPE_CHECKING: @@ -44,15 +41,9 @@ def show(self, target: str, revs: List[str] = None): if revs: revs = ["workspace", *revs] - if not os.path.exists(target): - raise MetricDoesNotExistError([target]) - metrics_path = target + ".json" - metrics = None - with contextlib.suppress(MetricsError): - metrics = self.repo.metrics.show(targets=[metrics_path]) - + metrics = self.repo.metrics.show(targets=[metrics_path]) plots = self.repo.plots.show(target, recursive=True, revs=revs) return metrics, plots diff --git a/dvc/repo/metrics/diff.py b/dvc/repo/metrics/diff.py index cb0974238a..b76ed6732b 100644 --- a/dvc/repo/metrics/diff.py +++ b/dvc/repo/metrics/diff.py @@ -1,17 +1,8 @@ -from dvc.exceptions import MetricsError from dvc.repo.experiments.utils import fix_exp_head from dvc.utils.diff import diff as _diff from dvc.utils.diff import format_dict -def _get_metrics(repo, *args, revs=None, **kwargs): - try: - metrics = repo.metrics.show(*args, **kwargs, revs=revs) - return metrics - except MetricsError: - return {} - - def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): if repo.scm.no_commits: return {} @@ -22,9 +13,9 @@ def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): a_rev = fix_exp_head(repo.scm, a_rev) b_rev = fix_exp_head(repo.scm, b_rev) or "workspace" - metrics = _get_metrics(repo, *args, **kwargs, revs=[a_rev, b_rev]) - old = metrics.get(a_rev, {}) - new = metrics.get(b_rev, {}) + metrics = repo.metrics.show(*args, **kwargs, revs=[a_rev, b_rev]) + old = metrics.get(a_rev, {}).get("data", {}) + new = metrics.get(b_rev, {}).get("data", {}) return _diff( format_dict(old), format_dict(new), with_unchanged=with_unchanged diff --git a/dvc/repo/metrics/show.py b/dvc/repo/metrics/show.py index 5ff8bb11af..b7eb48785e 100644 --- a/dvc/repo/metrics/show.py +++ b/dvc/repo/metrics/show.py @@ -1,19 +1,14 @@ import logging from typing import List -from dvc.exceptions import ( - MetricDoesNotExistError, - NoMetricsFoundError, - NoMetricsParsedError, -) from dvc.fs.repo import RepoFileSystem from dvc.output import Output -from dvc.path_info import PathInfo from dvc.repo import locked -from dvc.repo.collect import collect +from dvc.repo.collect import DvcPaths, collect from dvc.repo.live import summary_path_info from dvc.scm.base import SCMError -from dvc.utils.serialize import YAMLFileCorruptedError, load_yaml +from dvc.utils import error_handler, errored_revisions, onerror_collect +from dvc.utils.serialize import load_yaml logger = logging.getLogger(__name__) @@ -22,7 +17,7 @@ def _is_metric(out: Output) -> bool: return bool(out.metric) or bool(out.live) -def _to_path_infos(metrics: List[Output]) -> List[PathInfo]: +def _to_path_infos(metrics: List[Output]) -> DvcPaths: result = [] for out in metrics: if out.metric: @@ -70,7 +65,14 @@ def _extract_metrics(metrics, path, rev): return ret -def _read_metrics(repo, metrics, rev): +@error_handler +def _read_metric(path, fs, rev, **kwargs): + val = load_yaml(path, fs=fs) + val = _extract_metrics(val, path, rev) + return val or {} + + +def _read_metrics(repo, metrics, rev, onerror=None): fs = RepoFileSystem(repo) res = {} @@ -78,21 +80,16 @@ def _read_metrics(repo, metrics, rev): if not fs.isfile(metric): continue - try: - val = load_yaml(metric, fs=fs) - except (FileNotFoundError, YAMLFileCorruptedError): - logger.debug( - "failed to read '%s' on '%s'", metric, rev, exc_info=True - ) - continue - - val = _extract_metrics(val, metric, rev) - if val not in (None, {}): - res[str(metric)] = val + res[str(metric)] = _read_metric(metric, fs, rev, onerror=onerror) return res +def _gather_metrics(repo, targets, rev, recursive, onerror=None): + metrics = _collect_metrics(repo, targets, rev, recursive) + return _read_metrics(repo, metrics, rev, onerror=onerror) + + @locked def show( repo, @@ -102,33 +99,21 @@ def show( recursive=False, revs=None, all_commits=False, + onerror=None, ): - res = {} - metrics_found = False + if onerror is None: + onerror = onerror_collect + res = {} for rev in repo.brancher( revs=revs, all_branches=all_branches, all_tags=all_tags, all_commits=all_commits, ): - metrics = _collect_metrics(repo, targets, rev, recursive) - - if not metrics_found and metrics: - metrics_found = True - - vals = _read_metrics(repo, metrics, rev) - - if vals: - res[rev] = vals - - if not res: - if metrics_found: - raise NoMetricsParsedError("metrics") - elif targets: - raise MetricDoesNotExistError(targets) - else: - raise NoMetricsFoundError("metrics", "-m/-M") + res[rev] = error_handler(_gather_metrics)( + repo, targets, rev, recursive, onerror=onerror + ) # Hide workspace metrics if they are the same as in the active branch try: @@ -141,4 +126,13 @@ def show( if res.get("workspace") == res.get(active_branch): res.pop("workspace", None) + errored = errored_revisions(res) + if errored: + from dvc.ui import ui + + ui.error_write( + "DVC failed to load some metrics for following revisions:" + f" '{', '.join(errored)}'." + ) + return res diff --git a/dvc/repo/params/diff.py b/dvc/repo/params/diff.py index e3898f748a..f08255b3fe 100644 --- a/dvc/repo/params/diff.py +++ b/dvc/repo/params/diff.py @@ -2,16 +2,6 @@ from dvc.utils.diff import diff as _diff from dvc.utils.diff import format_dict -from .show import NoParamsError - - -def _get_params(repo, *args, revs=None, **kwargs): - try: - params = repo.params.show(*args, **kwargs, revs=revs) - return params - except NoParamsError: - return {} - def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): if repo.scm.no_commits: @@ -23,9 +13,10 @@ def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): a_rev = fix_exp_head(repo.scm, a_rev) b_rev = fix_exp_head(repo.scm, b_rev) or "workspace" - params = _get_params(repo, *args, **kwargs, revs=[a_rev, b_rev]) - old = params.get(a_rev, {}) - new = params.get(b_rev, {}) + params = repo.params.show(*args, **kwargs, revs=[a_rev, b_rev]) + + old = params.get(a_rev, {}).get("data", {}) + new = params.get(b_rev, {}).get("data", {}) return _diff( format_dict(old), format_dict(new), with_unchanged=with_unchanged diff --git a/dvc/repo/params/show.py b/dvc/repo/params/show.py index 65b5d90f00..6630ddd7e8 100644 --- a/dvc/repo/params/show.py +++ b/dvc/repo/params/show.py @@ -1,16 +1,17 @@ import logging from collections import defaultdict from copy import copy -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple from dvc.dependency.param import ParamsDependency -from dvc.exceptions import DvcException from dvc.path_info import PathInfo from dvc.repo import locked from dvc.repo.collect import collect from dvc.scm.base import SCMError from dvc.stage import PipelineStage -from dvc.utils.serialize import LOADERS, ParseError +from dvc.ui import ui +from dvc.utils import error_handler, errored_revisions, onerror_collect +from dvc.utils.serialize import LOADERS if TYPE_CHECKING: from dvc.output import Output @@ -20,10 +21,6 @@ logger = logging.getLogger(__name__) -class NoParamsError(DvcException): - pass - - def _is_params(dep: "Output"): return isinstance(dep, ParamsDependency) @@ -31,6 +28,7 @@ def _is_params(dep: "Output"): def _collect_configs( repo: "Repo", rev, targets=None ) -> Tuple[List["Output"], List["DvcPath"]]: + params, path_infos = collect( repo, targets=targets or [], @@ -43,46 +41,48 @@ def _collect_configs( default_params = ( PathInfo(repo.root_dir) / ParamsDependency.DEFAULT_PARAMS_FILE ) - if default_params not in all_path_infos: + if default_params not in all_path_infos and repo.fs.exists( + default_params + ): path_infos.append(default_params) return params, path_infos -def _read_path_info(fs, path_info, rev): - if not fs.exists(path_info): - return None - +@error_handler +def _read_path_info(fs, path_info, **kwargs): suffix = path_info.suffix.lower() loader = LOADERS[suffix] - try: - return loader(path_info, fs=fs) - except ParseError: - logger.debug( - "failed to read '%s' on '%s'", path_info, rev, exc_info=True - ) - return None + return loader(path_info, fs=fs) -def _read_params(repo, params, params_path_infos, rev, deps=False): - res = defaultdict(dict) +def _read_params( + repo, + params, + params_path_infos, + deps=False, + onerror: Optional[Callable] = None, +): + res: Dict[str, Dict] = defaultdict(dict) path_infos = copy(params_path_infos) if deps: for param in params: - res[str(param.path_info)].update(param.read_params_d()) + params_dict = error_handler(param.read_params_d)(onerror=onerror) + if params_dict: + res[str(param.path_info)] = params_dict else: path_infos += [param.path_info for param in params] for path_info in path_infos: - from_path = _read_path_info(repo.fs, path_info, rev) + from_path = _read_path_info(repo.fs, path_info, onerror=onerror) if from_path: res[str(path_info)] = from_path return res -def _collect_vars(repo, params): - vars_params = defaultdict(dict) +def _collect_vars(repo, params) -> Dict: + vars_params: Dict[str, Dict] = defaultdict(dict) for stage in repo.stages: if isinstance(stage, PipelineStage) and stage.tracked_vars: for file, vars_ in stage.tracked_vars.items(): @@ -96,24 +96,19 @@ def _collect_vars(repo, params): @locked -def show(repo, revs=None, targets=None, deps=False): +def show(repo, revs=None, targets=None, deps=False, onerror: Callable = None): + if onerror is None: + onerror = onerror_collect res = {} for branch in repo.brancher(revs=revs): - params, params_path_infos = _collect_configs(repo, branch, targets) - params = _read_params(repo, params, params_path_infos, branch, deps) - vars_params = _collect_vars(repo, params) - - # NOTE: only those that are not added as a ParamDependency are included - # so we don't need to recursively merge them yet. - params.update(vars_params) + params = error_handler(_gather_params)( + repo=repo, rev=branch, targets=targets, deps=deps, onerror=onerror + ) if params: res[branch] = params - if not res: - raise NoParamsError("no parameter configs files in this repository") - # Hide workspace params if they are the same as in the active branch try: active_branch = repo.scm.active_branch() @@ -125,4 +120,31 @@ def show(repo, revs=None, targets=None, deps=False): if res.get("workspace") == res.get(active_branch): res.pop("workspace", None) + errored = errored_revisions(res) + if errored: + ui.error_write( + "DVC failed to load some parameters for following revisions:" + f" '{', '.join(errored)}'." + ) + return res + + +def _gather_params(repo, rev, targets=None, deps=False, onerror=None): + param_outs, params_path_infos = _collect_configs( + repo, rev, targets=targets + ) + params = _read_params( + repo, + params=param_outs, + params_path_infos=params_path_infos, + deps=deps, + onerror=onerror, + ) + vars_params = _collect_vars(repo, params) + + # NOTE: only those that are not added as a ParamDependency are + # included so we don't need to recursively merge them yet. + for key, vals in vars_params.items(): + params[key]["data"] = vals + return params diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 6f76cfb3ae..64d30068cc 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -1,17 +1,22 @@ +import csv +import io import logging import os -from typing import TYPE_CHECKING, Dict, List, Optional +from collections import OrderedDict +from typing import TYPE_CHECKING, Callable, Dict, List, Optional from funcy import cached_property, first, project -from dvc.exceptions import ( - DvcException, - MetricDoesNotExistError, - NoMetricsFoundError, - NoMetricsParsedError, -) +from dvc.exceptions import DvcException +from dvc.repo.plots.data import PlotMetricTypeError from dvc.types import StrPath -from dvc.utils import relpath +from dvc.utils import ( + error_handler, + errored_revisions, + onerror_collect, + relpath, +) +from dvc.utils.serialize import LOADERS if TYPE_CHECKING: from dvc.output import Output @@ -41,18 +46,17 @@ def collect( targets: List[str] = None, revs: List[str] = None, recursive: bool = False, + onerror: Optional[Callable] = None, + props: Optional[Dict] = None, ) -> Dict[str, Dict]: """Collects all props and data for plots. Returns a structure like: {rev: {plots.csv: { props: {x: ..., "header": ..., ...}, - data: "...data as a string...", + data: "unstructured data (as stored for given extension)", }}} - Data parsing is postponed, since it's affected by props. """ - from dvc.config import NoRemoteError - from dvc.fs.repo import RepoFileSystem from dvc.utils.collections import ensure_list targets = ensure_list(targets) @@ -62,70 +66,68 @@ def collect( if revs is not None and rev not in revs: continue rev = rev or "workspace" + data[rev] = self._collect_from_revision( + revision=rev, + targets=targets, + recursive=recursive, + onerror=onerror, + props=props, + ) - fs = RepoFileSystem(self.repo) - plots = _collect_plots(self.repo, targets, rev, recursive) - for path_info, props in plots.items(): - - if rev not in data: - data[rev] = {} - - if fs.isdir(path_info): - plot_files = [] - try: - for pi in fs.walk_files(path_info): - plot_files.append( - (pi, relpath(pi, self.repo.root_dir)) - ) - except NoRemoteError: - logger.debug( - ( - "Could not find cache for directory '%s' on " - "'%s'. Files inside will not be plotted." - ), - path_info, - rev, - ) - continue - else: - plot_files = [ - (path_info, relpath(path_info, self.repo.root_dir)) - ] - - for path, repo_path in plot_files: - data[rev].update({repo_path: {"props": props}}) - - # Load data from git or dvc cache - try: - with fs.open(path) as fd: - data[rev][repo_path]["data"] = fd.read() - except FileNotFoundError: - # This might happen simply because cache is absent - logger.debug( - ( - "Could not find '%s' on '%s'. " - "File will not be plotted" - ), - path, - rev, - ) - except UnicodeDecodeError: - logger.debug( - ( - "'%s' at '%s' is binary file. It will not be " - "plotted." - ), - path, - rev, - ) + errored = errored_revisions(data) + if errored: + from dvc.ui import ui + + ui.error_write( + "DVC failed to load some plots for following revisions: " + f"'{', '.join(errored)}'." + ) return data + @error_handler + def _collect_from_revision( + self, + targets: Optional[List[str]] = None, + revision: Optional[str] = None, + recursive: bool = False, + onerror: Optional[Callable] = None, + props: Optional[Dict] = None, + ): + from dvc.fs.repo import RepoFileSystem + + fs = RepoFileSystem(self.repo) + plots = _collect_plots(self.repo, targets, revision, recursive) + res = {} + for path_info, rev_props in plots.items(): + + if fs.isdir(path_info): + plot_files = [] + for pi in fs.walk_files(path_info): + plot_files.append((pi, relpath(pi, self.repo.root_dir))) + else: + plot_files = [ + (path_info, relpath(path_info, self.repo.root_dir)) + ] + + props = props or {} + + for path, repo_path in plot_files: + res[repo_path] = {"props": rev_props} + res[repo_path].update( + parse( + fs, + path, + rev_props=rev_props, + props=props, + onerror=onerror, + ) + ) + return res + @staticmethod def render(data, revs=None, props=None, templates=None): """Renders plots""" - from dvc.repo.plots.data import PlotParsingError - props = props or {} # Merge data by plot file and apply overriding props @@ -133,20 +135,14 @@ def render(data, revs=None, props=None, templates=None): result = {} for datafile, desc in plots.items(): - try: - result[datafile] = _render( - datafile, desc["data"], desc["props"], templates - ) - except PlotParsingError as e: - logger.debug( - "failed to read '%s' on '%s'", - e.path, - e.revision, - exc_info=True, - ) - - if not any(result.values()): - raise NoMetricsParsedError("plots") + rendered = _render( + datafile, + desc["data"], + desc["props"], + templates, + ) + if rendered: + result[datafile] = rendered return result @@ -157,25 +153,13 @@ def show( props=None, templates=None, recursive=False, + onerror=None, ): - from dvc.utils.collections import ensure_list - - data = self.collect(targets, revs, recursive) - - # If any mentioned plot doesn't have any data then that's an error - for target in ensure_list(targets): - rpath = relpath(target, self.repo.root_dir) - if not any( - "data" in rev_data[key] - for rev_data in data.values() - for key, d in rev_data.items() - if rpath in key - ): - raise MetricDoesNotExistError([target]) - - # No data at all is a special error with a special message - if not data: - raise NoMetricsFoundError("plots", "--plots/--plots-no-cache") + if onerror is None: + onerror = onerror_collect + data = self.collect( + targets, revs, recursive, onerror=onerror, props=props + ) if templates is None: templates = self.templates @@ -273,11 +257,27 @@ def _collect_plots( rev=rev, recursive=recursive, ) + result = {plot.path_info: _plot_props(plot) for plot in plots} result.update({path_info: {} for path_info in path_infos}) return result +@error_handler +def parse(fs, path, props=None, rev_props=None, **kwargs): + props = props or {} + rev_props = rev_props or {} + _, extension = os.path.splitext(path) + if extension in (".tsv", ".csv"): + header = {**rev_props, **props}.get("header", True) + if extension == ".csv": + return _load_sv(path=path, fs=fs, delimiter=",", header=header) + return _load_sv(path=path, fs=fs, delimiter="\t", header=header) + if extension in LOADERS or extension in (".yml", ".yaml"): + return LOADERS[extension](path=path, fs=fs) + raise PlotMetricTypeError(path) + + def _plot_props(out: "Output") -> Dict: from dvc.schema import PLOT_PROPS @@ -305,7 +305,7 @@ def _prepare_plots(data, revs, props): if rev not in data: continue - for datafile, desc in data[rev].items(): + for datafile, desc in data[rev].get("data", {}).items(): # We silently skip on an absent data file, # see also try/except/pass in .collect() if "data" not in desc: @@ -355,19 +355,44 @@ def _render(datafile, datas, props, templates): # Parse all data, preprocess it and collect as a list of dicts data = [] - for rev, datablob in datas.items(): - rev_data = plot_data(datafile, rev, datablob).to_datapoints( + for rev, unstructured_data in datas.items(): + rev_data = plot_data(datafile, rev, unstructured_data).to_datapoints( fields=fields, path=props.get("path"), - header=props.get("header", True), append_index=props.get("append_index", False), ) data.extend(rev_data) # If y is not set then use last field not used yet - if not props.get("y") and template.has_anchor("y"): - fields = list(first(data)) - skip = (PlotData.REVISION_FIELD, props.get("x")) - props["y"] = first(f for f in reversed(fields) if f not in skip) + if data: + if not props.get("y") and template.has_anchor("y"): + fields = list(first(data)) + skip = (PlotData.REVISION_FIELD, props.get("x")) + props["y"] = first(f for f in reversed(fields) if f not in skip) + + return template.render(data, props=props) + return None + + +def _load_sv(path, fs, delimiter=",", header=True): + with fs.open(path, "r") as fd: + content = fd.read() + + first_row = first(csv.reader(io.StringIO(content))) + + if header: + reader = csv.DictReader(io.StringIO(content), delimiter=delimiter) + else: + reader = csv.DictReader( + io.StringIO(content), + delimiter=delimiter, + fieldnames=[str(i) for i in range(len(first_row))], + ) + + fieldnames = reader.fieldnames + data = list(reader) - return template.render(data, props=props) + return [ + OrderedDict([(field, data_point[field]) for field in fieldnames]) + for data_point in data + ] diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index 7515baa783..5268d81152 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -1,13 +1,10 @@ -import csv -import io import os -from collections import OrderedDict from copy import copy -from funcy import first, reraise +from funcy import first from dvc.exceptions import DvcException -from dvc.utils.serialize import ParseError, parse_json, parse_yaml +from dvc.utils.serialize import ParseError class PlotMetricTypeError(DvcException): @@ -31,7 +28,7 @@ def __init__(self, path, revision): self.path = path self.revision = revision - super().__init__(path, f"revision: {revision}") + super().__init__(path, f"revision: '{revision}'") def plot_data(filename, revision, content): @@ -148,18 +145,11 @@ def __init__(self, filename, revision, content, **kwargs): self.revision = revision self.content = content - def raw(self, **kwargs): - raise NotImplementedError - def _processors(self): return [_filter_fields, _append_index, _append_revision] def to_datapoints(self, **kwargs): - with reraise( - [ParseError, csv.Error], - PlotParsingError(self.filename, self.revision), - ): - data = self.raw(**kwargs) + data = self.content for data_proc in self._processors(): data = data_proc( @@ -169,48 +159,16 @@ def to_datapoints(self, **kwargs): class JSONPlotData(PlotData): - def raw(self, **kwargs): - return parse_json( - self.content, self.filename, object_pairs_hook=OrderedDict - ) - def _processors(self): parent_processors = super()._processors() return [_apply_path, _find_data] + parent_processors class CSVPlotData(PlotData): - def __init__(self, filename, revision, content, delimiter=","): - super().__init__(filename, revision, content) - self.delimiter = delimiter - - def raw(self, header=True, **kwargs): # pylint: disable=arguments-differ - first_row = first(csv.reader(io.StringIO(self.content))) - - if header: - reader = csv.DictReader( - io.StringIO(self.content), delimiter=self.delimiter - ) - else: - reader = csv.DictReader( - io.StringIO(self.content), - delimiter=self.delimiter, - fieldnames=[str(i) for i in range(len(first_row))], - ) - - fieldnames = reader.fieldnames - data = list(reader) - - return [ - OrderedDict([(field, data_point[field]) for field in fieldnames]) - for data_point in data - ] + pass class YAMLPlotData(PlotData): - def raw(self, **kwargs): - return parse_yaml(self.content, self.filename, typ="rt") - def _processors(self): parent_processors = super()._processors() return [_find_data] + parent_processors diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 522037f267..e396be44b5 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -9,7 +9,7 @@ import stat import sys import time -from typing import Optional, Tuple +from typing import Dict, List, Optional, Tuple import colorama @@ -480,3 +480,35 @@ def glob_targets(targets, glob=True, recursive=True): for target in targets for exp_target in iglob(target, recursive=recursive) ] + + +def error_handler(func): + def wrapper(*args, **kwargs): + onerror = kwargs.get("onerror", None) + result = {} + + try: + vals = func(*args, **kwargs) + if vals: + result["data"] = vals + except Exception as e: # pylint: disable=broad-except + if onerror is not None: + onerror(result, e, **kwargs) + return result + + return wrapper + + +def onerror_collect(result: Dict, exception: Exception, *args, **kwargs): + logger.debug("", exc_info=True) + result["error"] = exception + + +def errored_revisions(rev_data: Dict) -> List: + from dvc.utils.collections import nested_contains + + result = [] + for revision, data in rev_data.items(): + if nested_contains(data, "error"): + result.append(revision) + return result diff --git a/dvc/utils/collections.py b/dvc/utils/collections.py index ad50224ecb..70ecf56e2f 100644 --- a/dvc/utils/collections.py +++ b/dvc/utils/collections.py @@ -134,3 +134,14 @@ def inner(*args, **kwargs): return inner return wrapped + + +def nested_contains(dictionary: Dict, phrase: str) -> bool: + for key, val in dictionary.items(): + if key == phrase and val: + return True + + if isinstance(val, dict): + if nested_contains(val, phrase): + return True + return False diff --git a/dvc/utils/diff.py b/dvc/utils/diff.py index dddcb097cd..48f246a5c3 100644 --- a/dvc/utils/diff.py +++ b/dvc/utils/diff.py @@ -79,7 +79,11 @@ def diff(old, new, with_unchanged=False): res = defaultdict(dict) for path in paths: - path_diff = _diff(old.get(path), new.get(path), with_unchanged) + path_diff = _diff( + old.get(path, {}).get("data", {}), + new.get(path, {}).get("data", {}), + with_unchanged, + ) if path_diff: res[path] = path_diff return dict(res) diff --git a/dvc/utils/serialize/_json.py b/dvc/utils/serialize/_json.py index 210d20bb0e..16c6950f7f 100644 --- a/dvc/utils/serialize/_json.py +++ b/dvc/utils/serialize/_json.py @@ -27,3 +27,9 @@ def dump_json(path, data, fs=None): def modify_json(path, fs=None): with _modify_data(path, parse_json, dump_json, fs=fs) as d: yield d + + +def encode_exception(o): + if isinstance(o, Exception): + return {"type": type(o).__name__, "msg": str(o)} + raise TypeError diff --git a/tests/func/experiments/test_show.py b/tests/func/experiments/test_show.py index 758b02ebc2..bc6d72cb6d 100644 --- a/tests/func/experiments/test_show.py +++ b/tests/func/experiments/test_show.py @@ -3,26 +3,28 @@ from datetime import datetime import pytest -from funcy import first +from funcy import first, get_in from dvc.exceptions import InvalidArgumentError from dvc.main import main from dvc.repo.experiments.base import EXPS_STASH, ExpRefInfo from dvc.repo.experiments.executor.base import BaseExecutor, ExecutorInfo from dvc.utils.fs import makedirs -from dvc.utils.serialize import dump_yaml +from dvc.utils.serialize import YAMLFileCorruptedError, dump_yaml from tests.func.test_repro_multistage import COPY_SCRIPT def test_show_simple(tmp_dir, scm, dvc, exp_stage): assert dvc.experiments.show()["workspace"] == { "baseline": { - "metrics": {"metrics.yaml": {"foo": 1}}, - "params": {"params.yaml": {"foo": 1}}, - "queued": False, - "running": False, - "executor": None, - "timestamp": None, + "data": { + "metrics": {"metrics.yaml": {"data": {"foo": 1}}}, + "params": {"params.yaml": {"data": {"foo": 1}}}, + "queued": False, + "running": False, + "executor": None, + "timestamp": None, + } } } @@ -40,15 +42,17 @@ def test_show_experiment(tmp_dir, scm, dvc, exp_stage, workspace): results = dvc.experiments.show() expected_baseline = { - "metrics": {"metrics.yaml": {"foo": 1}}, - "params": {"params.yaml": {"foo": 1}}, - "queued": False, - "running": False, - "executor": None, - "timestamp": timestamp, - "name": "master", + "data": { + "metrics": {"metrics.yaml": {"data": {"foo": 1}}}, + "params": {"params.yaml": {"data": {"foo": 1}}}, + "queued": False, + "running": False, + "executor": None, + "timestamp": timestamp, + "name": "master", + } } - expected_params = {"foo": 2} + expected_params = {"data": {"foo": 2}} assert set(results.keys()) == {"workspace", baseline_rev} experiments = results[baseline_rev] @@ -57,8 +61,8 @@ def test_show_experiment(tmp_dir, scm, dvc, exp_stage, workspace): if rev == "baseline": assert exp == expected_baseline else: - assert exp["metrics"]["metrics.yaml"] == expected_params - assert exp["params"]["params.yaml"] == expected_params + assert exp["data"]["metrics"]["metrics.yaml"] == expected_params + assert exp["data"]["params"]["params.yaml"] == expected_params def test_show_queued(tmp_dir, scm, dvc, exp_stage): @@ -69,9 +73,9 @@ def test_show_queued(tmp_dir, scm, dvc, exp_stage): results = dvc.experiments.show()[baseline_rev] assert len(results) == 2 - exp = results[exp_rev] + exp = results[exp_rev]["data"] assert exp["queued"] - assert exp["params"]["params.yaml"] == {"foo": 2} + assert exp["params"]["params.yaml"] == {"data": {"foo": 2}} # test that only queued experiments for the current baseline are returned tmp_dir.gen("foo", "foo") @@ -84,9 +88,9 @@ def test_show_queued(tmp_dir, scm, dvc, exp_stage): results = dvc.experiments.show()[new_rev] assert len(results) == 2 - exp = results[exp_rev] + exp = results[exp_rev]["data"] assert exp["queued"] - assert exp["params"]["params.yaml"] == {"foo": 3} + assert exp["params"]["params.yaml"] == {"data": {"foo": 3}} @pytest.mark.parametrize("workspace", [True, False]) @@ -106,7 +110,7 @@ def test_show_checkpoint( for rev, exp in results.items(): if rev != "baseline": checkpoints.append(rev) - assert exp["checkpoint_tip"] == exp_rev + assert exp["data"]["checkpoint_tip"] == exp_rev capsys.readouterr() assert main(["exp", "show", "--no-pager"]) == 0 @@ -361,12 +365,14 @@ def test_show_running_workspace(tmp_dir, scm, dvc, exp_stage, capsys): assert dvc.experiments.show()["workspace"] == { "baseline": { - "metrics": {"metrics.yaml": {"foo": 1}}, - "params": {"params.yaml": {"foo": 1}}, - "queued": False, - "running": True, - "executor": info.location, - "timestamp": None, + "data": { + "metrics": {"metrics.yaml": {"data": {"foo": 1}}}, + "params": {"params.yaml": {"data": {"foo": 1}}}, + "queued": False, + "running": True, + "executor": info.location, + "timestamp": None, + } } } @@ -389,10 +395,12 @@ def test_show_running_executor(tmp_dir, scm, dvc, exp_stage): dump_yaml(pidfile, info.to_dict()) results = dvc.experiments.show() - assert not results[baseline_rev][exp_rev]["queued"] - assert results[baseline_rev][exp_rev]["running"] - assert results[baseline_rev][exp_rev]["executor"] == info.location - assert not results["workspace"]["baseline"]["running"] + exp_data = get_in(results, [baseline_rev, exp_rev, "data"]) + assert not exp_data["queued"] + assert exp_data["running"] + assert exp_data["executor"] == info.location + + assert not results["workspace"]["baseline"]["data"]["running"] @pytest.mark.parametrize("workspace", [True, False]) @@ -430,7 +438,33 @@ def test_show_running_checkpoint( ) if workspace: scm.set_ref(EXEC_BRANCH, str(exp_ref), symbolic=True) + results = dvc.experiments.show() - assert results[baseline_rev][checkpoint_rev]["running"] - assert results[baseline_rev][checkpoint_rev]["executor"] == info.location - assert not results["workspace"]["baseline"]["running"] + + checkpoint_res = get_in(results, [baseline_rev, checkpoint_rev, "data"]) + assert checkpoint_res["running"] + assert checkpoint_res["executor"] == info.location + + assert not results["workspace"]["baseline"]["data"]["running"] + + +def test_show_with_broken_repo(tmp_dir, scm, dvc, exp_stage, caplog): + baseline_rev = scm.get_rev() + exp1 = dvc.experiments.run(exp_stage.addressing, params=["foo=2"]) + exp2 = dvc.experiments.run(exp_stage.addressing, params=["foo=3"]) + + with open("dvc.yaml", "a") as fd: + fd.write("breaking the yaml!") + + result = dvc.experiments.show() + rev1 = first(exp1) + rev2 = first(exp2) + + baseline = result[baseline_rev] + + paths = ["data", "params", "params.yaml"] + assert get_in(baseline[rev1], paths) == {"data": {"foo": 2}} + assert get_in(baseline[rev2], paths) == {"data": {"foo": 3}} + + paths = ["workspace", "baseline", "error"] + assert isinstance(get_in(result, paths), YAMLFileCorruptedError) diff --git a/tests/func/metrics/test_show.py b/tests/func/metrics/test_show.py index ca939beb12..2299093ae9 100644 --- a/tests/func/metrics/test_show.py +++ b/tests/func/metrics/test_show.py @@ -1,17 +1,15 @@ +import logging import os import pytest +from funcy import get_in -from dvc.exceptions import ( - MetricDoesNotExistError, - NoMetricsFoundError, - NoMetricsParsedError, - OverlappingOutputPathsError, -) +from dvc.dvcfile import PIPELINE_FILE +from dvc.exceptions import OverlappingOutputPathsError from dvc.path_info import PathInfo from dvc.repo import Repo from dvc.utils.fs import remove -from dvc.utils.serialize import dump_yaml, modify_yaml +from dvc.utils.serialize import YAMLFileCorruptedError, dump_yaml, modify_yaml def test_show_simple(tmp_dir, dvc, run_copy_metrics): @@ -19,7 +17,9 @@ def test_show_simple(tmp_dir, dvc, run_copy_metrics): run_copy_metrics( "metrics_t.yaml", "metrics.yaml", metrics=["metrics.yaml"] ) - assert dvc.metrics.show() == {"": {"metrics.yaml": 1.1}} + assert dvc.metrics.show() == { + "": {"data": {"metrics.yaml": {"data": 1.1}}} + } def test_show(tmp_dir, dvc, run_copy_metrics): @@ -27,7 +27,9 @@ def test_show(tmp_dir, dvc, run_copy_metrics): run_copy_metrics( "metrics_t.yaml", "metrics.yaml", metrics=["metrics.yaml"] ) - assert dvc.metrics.show() == {"": {"metrics.yaml": {"foo": 1.1}}} + assert dvc.metrics.show() == { + "": {"data": {"metrics.yaml": {"data": {"foo": 1.1}}}} + } def test_show_multiple(tmp_dir, dvc, run_copy_metrics): @@ -35,7 +37,11 @@ def test_show_multiple(tmp_dir, dvc, run_copy_metrics): tmp_dir.gen("baz_temp", "baz: 2\n") run_copy_metrics("foo_temp", "foo", fname="foo.dvc", metrics=["foo"]) run_copy_metrics("baz_temp", "baz", fname="baz.dvc", metrics=["baz"]) - assert dvc.metrics.show() == {"": {"foo": {"foo": 1}, "baz": {"baz": 2}}} + assert dvc.metrics.show() == { + "": { + "data": {"foo": {"data": {"foo": 1}}, "baz": {"data": {"baz": 2}}} + } + } def test_show_branch(tmp_dir, scm, dvc, run_copy_metrics): @@ -50,8 +56,8 @@ def test_show_branch(tmp_dir, scm, dvc, run_copy_metrics): tmp_dir.scm_gen("metrics.yaml", "foo: 2", commit="branch") assert dvc.metrics.show(revs=["branch"]) == { - "workspace": {"metrics.yaml": {"foo": 1}}, - "branch": {"metrics.yaml": {"foo": 2}}, + "workspace": {"data": {"metrics.yaml": {"data": {"foo": 1}}}}, + "branch": {"data": {"metrics.yaml": {"data": {"foo": 2}}}}, } @@ -84,8 +90,8 @@ def test_show_subrepo_with_preexisting_tags(tmp_dir, scm): expected_path = os.path.join("subdir", "metrics.yaml") assert dvc.metrics.show(all_tags=True) == { - "workspace": {expected_path: {"foo": 1}}, - "v1": {expected_path: {"foo": 1}}, + "workspace": {"data": {expected_path: {"data": {"foo": 1}}}}, + "v1": {"data": {expected_path: {"data": {"foo": 1}}}}, } @@ -102,7 +108,16 @@ def test_missing_cache(tmp_dir, dvc, run_copy_metrics): remove(stage.outs[0].fspath) remove(stage.outs[0].cache_path) - assert dvc.metrics.show() == {"": {"metrics.yaml": 1.1}} + result = dvc.metrics.show() + metrics2 = result[""]["data"].pop("metrics2.yaml") + assert isinstance(metrics2["error"], FileNotFoundError) + assert result == { + "": { + "data": { + "metrics.yaml": {"data": 1.1}, + } + } + } @pytest.mark.parametrize("use_dvc", [True, False]) @@ -115,7 +130,7 @@ def test_show_non_metric(tmp_dir, scm, use_dvc): dvc = Repo(uninitialized=True) assert dvc.metrics.show(targets=["metrics.yaml"]) == { - "": {"metrics.yaml": {"foo": 1.1}} + "": {"data": {"metrics.yaml": {"data": {"foo": 1.1}}}} } if not use_dvc: @@ -134,8 +149,8 @@ def test_show_non_metric_branch(tmp_dir, scm, use_dvc): dvc = Repo(uninitialized=True) assert dvc.metrics.show(targets=["metrics.yaml"], revs=["branch"]) == { - "workspace": {"metrics.yaml": {"foo": 1.1}}, - "branch": {"metrics.yaml": {"foo": 2.2}}, + "workspace": {"data": {"metrics.yaml": {"data": {"foo": 1.1}}}}, + "branch": {"data": {"metrics.yaml": {"data": {"foo": 2.2}}}}, } if not use_dvc: @@ -154,9 +169,15 @@ def test_non_metric_and_recurisve_show(tmp_dir, dvc, run_copy_metrics): targets=["metrics_t.yaml", "metrics"], recursive=True ) == { "": { - os.path.join("metrics", "metric1.yaml"): {"bar": 1.2}, - os.path.join("metrics", "metric2.yaml"): {"foo": 1.1}, - "metrics_t.yaml": {"foo": 1.1}, + "data": { + os.path.join("metrics", "metric1.yaml"): { + "data": {"bar": 1.2} + }, + os.path.join("metrics", "metric2.yaml"): { + "data": {"foo": 1.1} + }, + "metrics_t.yaml": {"data": {"foo": 1.1}}, + } } } @@ -164,7 +185,7 @@ def test_non_metric_and_recurisve_show(tmp_dir, dvc, run_copy_metrics): def test_show_falsey(tmp_dir, dvc): tmp_dir.gen("metrics.json", '{"foo": 0, "bar": 0.0, "baz": {}}') assert dvc.metrics.show(targets=["metrics.json"]) == { - "": {"metrics.json": {"foo": 0, "bar": 0.0}} + "": {"data": {"metrics.json": {"data": {"foo": 0, "bar": 0.0}}}} } @@ -173,24 +194,33 @@ def test_show_no_repo(tmp_dir): dvc = Repo(uninitialized=True) - dvc.metrics.show(targets=["metrics.json"]) + assert dvc.metrics.show(targets=["metrics.json"]) == { + "": {"data": {"metrics.json": {"data": {"foo": 0, "bar": 0.0}}}} + } def test_show_malformed_metric(tmp_dir, scm, dvc, caplog): tmp_dir.gen("metric.json", '{"m":1') - with pytest.raises(NoMetricsParsedError): - dvc.metrics.show(targets=["metric.json"]) + assert isinstance( + dvc.metrics.show(targets=["metric.json"])[""]["data"]["metric.json"][ + "error" + ], + YAMLFileCorruptedError, + ) + +def test_metrics_show_no_target(tmp_dir, dvc, caplog): + with caplog.at_level(logging.WARNING): + assert dvc.metrics.show(targets=["metrics.json"]) == {"": {}} -def test_metrics_show_no_target(tmp_dir, dvc): - with pytest.raises(MetricDoesNotExistError): - dvc.metrics.show(targets=["metrics.json"]) + assert ( + "'metrics.json' was not found in current workspace." in caplog.messages + ) def test_show_no_metrics_files(tmp_dir, dvc, caplog): - with pytest.raises(NoMetricsFoundError): - dvc.metrics.show() + assert dvc.metrics.show() == {"": {}} @pytest.mark.parametrize("clear_before_run", [True, False]) @@ -224,5 +254,39 @@ def test_metrics_show_overlap( dvc._reset() - with pytest.raises(OverlappingOutputPathsError): - dvc.metrics.show() + res = dvc.metrics.show() + assert isinstance(res[""]["error"], OverlappingOutputPathsError) + + +@pytest.mark.parametrize( + "file,error_path", + ( + (PIPELINE_FILE, ["workspace", "error"]), + ("metrics.yaml", ["workspace", "data", "metrics.yaml", "error"]), + ), +) +def test_log_errors( + tmp_dir, scm, dvc, capsys, run_copy_metrics, file, error_path +): + tmp_dir.gen("metrics_t.yaml", "m: 1.1") + run_copy_metrics( + "metrics_t.yaml", + "metrics.yaml", + metrics=["metrics.yaml"], + single_stage=False, + name="train", + ) + scm.tag("v1") + + with open(file, "a") as fd: + fd.write("\nMALFORMED!") + + result = dvc.metrics.show(revs=["v1"]) + + _, error = capsys.readouterr() + + assert isinstance(get_in(result, error_path), YAMLFileCorruptedError) + assert ( + "DVC failed to load some metrics for following revisions: 'workspace'." + in error + ) diff --git a/tests/func/params/test_show.py b/tests/func/params/test_show.py index d89e889aaf..47bd7d9759 100644 --- a/tests/func/params/test_show.py +++ b/tests/func/params/test_show.py @@ -1,18 +1,23 @@ +import operator +from functools import reduce + import pytest from dvc.repo import Repo -from dvc.repo.params.show import NoParamsError +from dvc.repo.stage import PIPELINE_FILE +from dvc.utils.serialize import YAMLFileCorruptedError def test_show_empty(dvc): - with pytest.raises(NoParamsError): - dvc.params.show() + assert dvc.params.show() == {} def test_show(tmp_dir, dvc): tmp_dir.gen("params.yaml", "foo: bar") dvc.run(cmd="echo params.yaml", params=["foo"], single_stage=True) - assert dvc.params.show() == {"": {"params.yaml": {"foo": "bar"}}} + assert dvc.params.show() == { + "": {"data": {"params.yaml": {"data": {"foo": "bar"}}}} + } def test_show_toml(tmp_dir, dvc): @@ -21,7 +26,11 @@ def test_show_toml(tmp_dir, dvc): cmd="echo params.toml", params=["params.toml:foo"], single_stage=True ) assert dvc.params.show() == { - "": {"params.toml": {"foo": {"bar": 42, "baz": [1, 2]}}} + "": { + "data": { + "params.toml": {"data": {"foo": {"bar": 42, "baz": [1, 2]}}} + } + } } @@ -36,7 +45,13 @@ def test_show_py(tmp_dir, dvc): single_stage=True, ) assert dvc.params.show() == { - "": {"params.py": {"CONST": 1, "IS_DIR": True, "Config": {"foo": 42}}} + "": { + "data": { + "params.py": { + "data": {"CONST": 1, "Config": {"foo": 42}, "IS_DIR": True} + } + } + } } @@ -55,14 +70,16 @@ def test_show_multiple(tmp_dir, dvc): single_stage=True, ) assert dvc.params.show() == { - "": {"params.yaml": {"foo": "bar", "baz": "qux"}} + "": {"data": {"params.yaml": {"data": {"baz": "qux", "foo": "bar"}}}} } def test_show_list(tmp_dir, dvc): tmp_dir.gen("params.yaml", "foo:\n- bar\n- baz\n") dvc.run(cmd="echo params.yaml", params=["foo"], single_stage=True) - assert dvc.params.show() == {"": {"params.yaml": {"foo": ["bar", "baz"]}}} + assert dvc.params.show() == { + "": {"data": {"params.yaml": {"data": {"foo": ["bar", "baz"]}}}} + } def test_show_branch(tmp_dir, scm, dvc): @@ -75,14 +92,12 @@ def test_show_branch(tmp_dir, scm, dvc): tmp_dir.scm_gen("params.yaml", "foo: baz", commit="branch") assert dvc.params.show(revs=["branch"]) == { - "workspace": {"params.yaml": {"foo": "bar"}}, - "branch": {"params.yaml": {"foo": "baz"}}, + "branch": {"data": {"params.yaml": {"data": {"foo": "baz"}}}}, + "workspace": {"data": {"params.yaml": {"data": {"foo": "bar"}}}}, } def test_pipeline_params(tmp_dir, scm, dvc, run_copy): - from dvc.dvcfile import PIPELINE_FILE - tmp_dir.gen( {"foo": "foo", "params.yaml": "foo: bar\nxyz: val\nabc: ignore"} ) @@ -98,11 +113,17 @@ def test_pipeline_params(tmp_dir, scm, dvc, run_copy): ) assert dvc.params.show(revs=["master"], deps=True) == { - "master": {"params.yaml": {"foo": "qux", "xyz": "val"}} + "master": { + "data": {"params.yaml": {"data": {"foo": "qux", "xyz": "val"}}} + } } assert dvc.params.show(revs=["master"]) == { "master": { - "params.yaml": {"foo": "qux", "xyz": "val", "abc": "ignore"} + "data": { + "params.yaml": { + "data": {"abc": "ignore", "foo": "qux", "xyz": "val"} + } + } } } @@ -113,5 +134,47 @@ def test_show_no_repo(tmp_dir): dvc = Repo(uninitialized=True) assert dvc.params.show(targets=["params_file.yaml"]) == { - "": {"params_file.yaml": {"foo": "bar", "xyz": "val"}} + "": { + "data": { + "params_file.yaml": {"data": {"foo": "bar", "xyz": "val"}} + } + } } + + +@pytest.mark.parametrize( + "file,error_path", + ( + (PIPELINE_FILE, ["v1", "error"]), + ("params_other.yaml", ["v1", "data", "params_other.yaml", "error"]), + ), +) +def test_log_errors(tmp_dir, scm, dvc, capsys, file, error_path): + tmp_dir.gen("params_other.yaml", "foo: bar") + dvc.run( + cmd="echo params_other.yaml", + params=["params_other.yaml:foo"], + name="train", + ) + + rename = (tmp_dir / file).read_text() + with open(tmp_dir / file, "a") as fd: + fd.write("\nmalformed!") + + scm.add([PIPELINE_FILE, "params_other.yaml"]) + scm.commit("init") + scm.tag("v1") + + (tmp_dir / file).write_text(rename) + + result = dvc.params.show(revs=["v1"]) + + _, error = capsys.readouterr() + + assert isinstance( + reduce(operator.getitem, error_path, result), YAMLFileCorruptedError + ) + assert ( + "DVC failed to load some parameters for following revisions: 'v1'." + in error + ) diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index 327389d0e6..8586506a42 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -5,13 +5,10 @@ from collections import OrderedDict import pytest +from funcy import get_in -from dvc.exceptions import ( - MetricDoesNotExistError, - NoMetricsFoundError, - NoMetricsParsedError, - OverlappingOutputPathsError, -) +from dvc.dvcfile import PIPELINE_FILE +from dvc.exceptions import OverlappingOutputPathsError from dvc.main import main from dvc.path_info import PathInfo from dvc.repo import Repo @@ -26,8 +23,14 @@ NoFieldInDataError, TemplateNotFoundError, ) +from dvc.utils import onerror_collect from dvc.utils.fs import remove -from dvc.utils.serialize import dump_yaml, dumps_yaml, modify_yaml +from dvc.utils.serialize import ( + EncodingError, + YAMLFileCorruptedError, + dump_yaml, + modify_yaml, +) from tests.func.plots.utils import _write_csv, _write_json @@ -190,7 +193,8 @@ def test_plot_confusion(tmp_dir, dvc, run_copy_metrics): ) props = {"template": "confusion", "x": "predicted", "y": "actual"} - plot_string = dvc.plots.show(props=props)["metric.json"] + show = dvc.plots.show(props=props) + plot_string = show["metric.json"] plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ @@ -395,24 +399,6 @@ def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): ] -def test_throw_on_no_metric_at_all(tmp_dir, scm, dvc, caplog): - tmp_dir.scm_gen("some_file", "content", commit="there is no metric") - scm.tag("v1") - - tmp_dir.gen("some_file", "make repo dirty") - - caplog.clear() - with pytest.raises(MetricDoesNotExistError) as error, caplog.at_level( - logging.WARNING, "dvc" - ): - dvc.plots.show(targets="plot.json", revs=["v1"]) - - # do not warn if none found - assert len(caplog.messages) == 0 - - assert str(error.value) == "'plot.json' does not exist." - - def test_custom_template(tmp_dir, scm, dvc, custom_template, run_copy_metrics): metric = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] _write_json(tmp_dir, metric, "metric_t.json") @@ -481,8 +467,12 @@ def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): commit="add text metric", ) - with pytest.raises(PlotMetricTypeError): - dvc.plots.show(targets=["metric.txt"]) + assert isinstance( + dvc.plots.collect(targets=["metric.txt"], onerror=onerror_collect)[ + "workspace" + ]["data"]["metric.txt"]["error"], + PlotMetricTypeError, + ) def test_plot_choose_columns( @@ -570,24 +560,12 @@ def test_raise_on_wrong_field(tmp_dir, scm, dvc, run_copy_metrics): dvc.plots.show("metric.json", props={"y": "no_val"}) -def test_load_metric_from_dict_json(tmp_dir): +@pytest.mark.parametrize("data_class", [JSONPlotData, YAMLPlotData]) +def test_find_data_in_dict(tmp_dir, data_class): metric = [{"accuracy": 1, "loss": 2}, {"accuracy": 3, "loss": 4}] dmetric = {"train": metric} - plot_data = JSONPlotData("-", "revision", json.dumps(dmetric)) - - expected = metric - for d in expected: - d["rev"] = "revision" - - assert list(map(dict, plot_data.to_datapoints())) == expected - - -def test_load_metric_from_dict_yaml(tmp_dir): - metric = [{"accuracy": 1, "loss": 2}, {"accuracy": 3, "loss": 4}] - dmetric = {"train": metric} - - plot_data = YAMLPlotData("-", "revision", dumps_yaml(dmetric)) + plot_data = data_class("-", "revision", dmetric) expected = metric for d in expected: @@ -699,20 +677,22 @@ def test_show_from_subdir(tmp_dir, dvc, capsys): def test_show_malformed_plots(tmp_dir, scm, dvc, caplog): - tmp_dir.gen("plot.json", '[{"m":1]') + tmp_dir.gen("plot.json", '[{"m":1}]') + scm.add(["plot.json"]) + scm.commit("initial") - with pytest.raises(NoMetricsParsedError): - dvc.plots.show(targets=["plot.json"]) + tmp_dir.gen("plot.json", '[{"m":1]') + result = dvc.plots.show(targets=["plot.json"], revs=["workspace", "HEAD"]) + plot_content = json.loads(result["plot.json"]) -def test_plots_show_no_target(tmp_dir, dvc): - with pytest.raises(MetricDoesNotExistError): - dvc.plots.show(targets=["plot.json"]) + assert plot_content["data"]["values"] == [ + {"m": 1, "rev": "HEAD", "step": 0} + ] -def test_show_no_plots_files(tmp_dir, dvc, caplog): - with pytest.raises(NoMetricsFoundError): - dvc.plots.show() +def test_plots_show_non_existing(tmp_dir, dvc): + assert dvc.plots.show(targets=["plot.json"]) == {} @pytest.mark.parametrize("clear_before_run", [True, False]) @@ -744,8 +724,10 @@ def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): dvc._reset() - with pytest.raises(OverlappingOutputPathsError): - dvc.plots.show() + assert isinstance( + dvc.plots.collect(onerror=onerror_collect)["workspace"]["error"], + OverlappingOutputPathsError, + ) def test_dir_plots(tmp_dir, dvc, run_copy_metrics): @@ -810,15 +792,51 @@ def test_show_dir_plots(tmp_dir, dvc, run_copy_metrics): remove(dvc.odb.local.cache_dir) remove(subdir) - with pytest.raises(NoMetricsParsedError): - dvc.plots.show() + + assert dvc.plots.show() == {} def test_ignore_binary_file(tmp_dir, dvc, run_copy_metrics): with open("file", "wb") as fobj: fobj.write(b"\xc1") - run_copy_metrics("file", "plot_file", plots=["plot_file"]) + run_copy_metrics("file", "plot_file.json", plots=["plot_file.json"]) + result = dvc.plots.collect(onerror=onerror_collect) + + assert isinstance( + result["workspace"]["data"]["plot_file.json"]["error"], EncodingError + ) + + +@pytest.mark.parametrize( + "file,error_path", + ( + (PIPELINE_FILE, ["workspace", "error"]), + ("plot.yaml", ["workspace", "data", "plot.yaml", "error"]), + ), +) +def test_log_errors( + tmp_dir, scm, dvc, run_copy_metrics, file, error_path, capsys +): + metric = [{"val": 2}, {"val": 3}] + dump_yaml("metric_t.yaml", metric) + run_copy_metrics( + "metric_t.yaml", + "plot.yaml", + plots=["plot.yaml"], + single_stage=False, + name="train", + ) + scm.tag("v1") + + with open(file, "a") as fd: + fd.write("\nMALFORMED!") - with pytest.raises(NoMetricsParsedError): - dvc.plots.show() + result = dvc.plots.collect(onerror=onerror_collect) + _, error = capsys.readouterr() + + assert isinstance(get_in(result, error_path), YAMLFileCorruptedError) + assert ( + "DVC failed to load some plots for following revisions: 'workspace'." + in error + ) diff --git a/tests/func/test_live.py b/tests/func/test_live.py index 8975a8e705..102a9adbe9 100644 --- a/tests/func/test_live.py +++ b/tests/func/test_live.py @@ -6,7 +6,6 @@ from funcy import first from dvc import stage as stage_module -from dvc.exceptions import MetricsError LIVE_SCRIPT = dedent( """ @@ -113,7 +112,13 @@ def test_live_provides_metrics(tmp_dir, dvc, live_stage): assert (tmp_dir / "logs.json").is_file() assert dvc.metrics.show() == { - "": {"logs.json": {"step": 1, "loss": 0.5, "accuracy": 0.5}} + "": { + "data": { + "logs.json": { + "data": {"accuracy": 0.5, "loss": 0.5, "step": 1} + } + } + } } assert (tmp_dir / "logs").is_dir() @@ -126,8 +131,7 @@ def test_live_provides_no_metrics(tmp_dir, dvc, live_stage): live_stage(summary=False, live="logs") assert not (tmp_dir / "logs.json").is_file() - with pytest.raises(MetricsError): - assert dvc.metrics.show() == {} + assert dvc.metrics.show() == {"": {}} assert (tmp_dir / "logs").is_dir() plots = dvc.plots.show() @@ -145,7 +149,7 @@ def test_experiments_track_summary(tmp_dir, scm, dvc, live_stage, typ): ((exp_rev, _),) = experiments.items() res = dvc.experiments.show() - assert "logs.json" in res[baseline_rev][exp_rev]["metrics"].keys() + assert "logs.json" in res[baseline_rev][exp_rev]["data"]["metrics"].keys() @pytest.mark.parametrize("html", [True, False]) @@ -194,7 +198,9 @@ def checkpoints_metric(show_results, metric_file, metric_name): tmp.pop("baseline") return list( map( - lambda exp: exp["metrics"][metric_file][metric_name], + lambda exp: exp["data"]["metrics"][metric_file]["data"][ + metric_name + ], list(tmp.values()), ) ) diff --git a/tests/unit/command/test_experiments.py b/tests/unit/command/test_experiments.py index 25ed74465c..e271f14644 100644 --- a/tests/unit/command/test_experiments.py +++ b/tests/unit/command/test_experiments.py @@ -75,8 +75,8 @@ def test_experiments_show(dvc, scm, mocker): assert cli_args.func == CmdExperimentsShow cmd = cli_args.func(cli_args) - m = mocker.patch("dvc.repo.experiments.show.show", return_value={}) + m = mocker.patch("dvc.repo.experiments.show.show", return_value={}) assert cmd.run() == 0 m.assert_called_once_with( diff --git a/tests/unit/command/test_live.py b/tests/unit/command/test_live.py index 1ad747ae3e..a02b10164f 100644 --- a/tests/unit/command/test_live.py +++ b/tests/unit/command/test_live.py @@ -20,7 +20,7 @@ def test_live_diff(dvc, mocker): cmd = cli_args.func(cli_args) m = mocker.patch("dvc.repo.live.Live.show", return_value=({}, {})) - assert cmd.run() == 0 + assert cmd.run() == 1 m.assert_called_once_with(target="target", revs=["HEAD", "rev1"]) @@ -35,6 +35,6 @@ def test_live_show(dvc, mocker): m = mocker.patch("dvc.repo.live.Live.show", return_value=({}, {})) - assert cmd.run() == 0 + assert cmd.run() == 1 m.assert_called_once_with(target="datafile", revs=None) diff --git a/tests/unit/command/test_params.py b/tests/unit/command/test_params.py index 41eb04b157..49428d1910 100644 --- a/tests/unit/command/test_params.py +++ b/tests/unit/command/test_params.py @@ -48,7 +48,12 @@ def test_params_diff_from_cli(dvc, mocker): assert cmd.run() == 0 params_diff.assert_called_once_with( - cmd.repo, a_rev=None, b_rev=None, all=False, targets=None, deps=False + cmd.repo, + a_rev=None, + b_rev=None, + all=False, + targets=None, + deps=False, ) show_diff_mock.assert_called_once_with( {}, title="Param", markdown=False, no_path=False, show_changes=False diff --git a/tests/unit/command/test_plots.py b/tests/unit/command/test_plots.py index 1102e477cc..cff6b3fc50 100644 --- a/tests/unit/command/test_plots.py +++ b/tests/unit/command/test_plots.py @@ -86,7 +86,8 @@ def test_plots_show_vega(dvc, mocker): assert cmd.run() == 0 m.assert_called_once_with( - targets=["datafile"], props={"template": "template", "header": False} + targets=["datafile"], + props={"template": "template", "header": False}, ) diff --git a/tests/unit/test_compare.py b/tests/unit/test_compare.py index e5985da76a..98cf31e1fb 100644 --- a/tests/unit/test_compare.py +++ b/tests/unit/test_compare.py @@ -3,6 +3,7 @@ import pytest from dvc.compare import diff_table, metrics_table, show_diff, show_metrics +from dvc.utils.serialize import YAMLFileCorruptedError @pytest.mark.parametrize("title", ["Metric", "Param"]) @@ -266,7 +267,15 @@ def test_metrics_diff_md(capsys): def test_metrics_show_with_valid_falsey_values(): td = metrics_table( - {"branch_1": {"metrics.json": {"a": 0, "b": {"ad": 0.0, "bc": 0.0}}}}, + { + "branch_1": { + "data": { + "metrics.json": { + "data": {"a": 0, "b": {"ad": 0.0, "bc": 0.0}} + } + } + } + }, all_branches=True, ) assert td.as_dict() == [ @@ -282,7 +291,15 @@ def test_metrics_show_with_valid_falsey_values(): def test_metrics_show_with_no_revision(): td = metrics_table( - {"branch_1": {"metrics.json": {"a": 0, "b": {"ad": 0.0, "bc": 0.0}}}}, + { + "branch_1": { + "data": { + "metrics.json": { + "data": {"a": 0, "b": {"ad": 0.0, "bc": 0.0}} + } + } + } + }, all_branches=False, ) assert td.as_dict() == [ @@ -291,7 +308,10 @@ def test_metrics_show_with_no_revision(): def test_metrics_show_with_non_dict_values(): - td = metrics_table({"branch_1": {"metrics.json": 1}}, all_branches=True) + td = metrics_table( + {"branch_1": {"data": {"metrics.json": {"data": 1}}}}, + all_branches=True, + ) assert td.as_dict() == [ {"Revision": "branch_1", "Path": "metrics.json", "": "1"} ] @@ -300,8 +320,16 @@ def test_metrics_show_with_non_dict_values(): def test_metrics_show_with_multiple_revision(): td = metrics_table( { - "branch_1": {"metrics.json": {"a": 1, "b": {"ad": 1, "bc": 2}}}, - "branch_2": {"metrics.json": {"a": 1, "b": {"ad": 3, "bc": 4}}}, + "branch_1": { + "data": { + "metrics.json": {"data": {"a": 1, "b": {"ad": 1, "bc": 2}}} + } + }, + "branch_2": { + "data": { + "metrics.json": {"data": {"a": 1, "b": {"ad": 3, "bc": 4}}} + } + }, }, all_branches=True, ) @@ -327,8 +355,14 @@ def test_metrics_show_with_one_revision_multiple_paths(): td = metrics_table( { "branch_1": { - "metrics.json": {"a": 1, "b": {"ad": 0.1, "bc": 1.03}}, - "metrics_1.json": {"a": 2.3, "b": {"ad": 6.5, "bc": 7.9}}, + "data": { + "metrics.json": { + "data": {"a": 1, "b": {"ad": 0.1, "bc": 1.03}} + }, + "metrics_1.json": { + "data": {"a": 2.3, "b": {"ad": 6.5, "bc": 7.9}} + }, + } } }, all_branches=True, @@ -354,8 +388,16 @@ def test_metrics_show_with_one_revision_multiple_paths(): def test_metrics_show_with_different_metrics_header(): td = metrics_table( { - "branch_1": {"metrics.json": {"b": {"ad": 1, "bc": 2}, "c": 4}}, - "branch_2": {"metrics.json": {"a": 1, "b": {"ad": 3, "bc": 4}}}, + "branch_1": { + "data": { + "metrics.json": {"data": {"b": {"ad": 1, "bc": 2}, "c": 4}} + } + }, + "branch_2": { + "data": { + "metrics.json": {"data": {"a": 1, "b": {"ad": 3, "bc": 4}}} + } + }, }, all_branches=True, ) @@ -382,9 +424,13 @@ def test_metrics_show_with_different_metrics_header(): def test_metrics_show_precision(): metrics = { "branch_1": { - "metrics.json": { - "a": 1.098765366365355, - "b": {"ad": 1.5342673, "bc": 2.987725527}, + "data": { + "metrics.json": { + "data": { + "a": 1.098765366365355, + "b": {"ad": 1.5342673, "bc": 2.987725527}, + } + } } } } @@ -445,44 +491,56 @@ def test_metrics_show_mocked(mocker, markdown): def test_metrics_show_default(capsys): show_metrics( - { - "metrics.yaml": { - "x.b": {"old": 5, "new": 6}, - "a.d.e": {"old": 3, "new": 4, "diff": 1}, - "a.b.c": {"old": 1, "new": 2, "diff": 1}, - } - } + metrics={ + "branch_1": { + "data": { + "metrics.json": {"data": {"b": {"ad": 1, "bc": 2}, "c": 4}} + }, + "error": Exception("Failed just a little bit"), + }, + "branch_2": { + "data": { + "metrics.json": {"data": {"a": 1, "b": {"ad": 3, "bc": 4}}} + } + }, + }, + all_branches=True, ) out, _ = capsys.readouterr() assert out == textwrap.dedent( """\ - Path diff new old - x.b - 6 5 - a.d.e 1 4 3 - a.b.c 1 2 1 + Revision Path a b.ad b.bc c + branch_1 metrics.json - 1 2 4 + branch_2 metrics.json 1 3 4 - """ ) def test_metrics_show_md(capsys): show_metrics( - { - "metrics.yaml": { - "x.b": {"old": 5, "new": 6}, - "a.d.e": {"old": 3, "new": 4, "diff": 1}, - "a.b.c": {"old": 1, "new": 2, "diff": 1}, - } + metrics={ + "branch_1": { + "data": { + "metrics.json": {"data": {"b": {"ad": 1, "bc": 2}, "c": 4}} + } + }, + "branch_2": { + "data": { + "metrics.json": {"data": {"a": 1, "b": {"ad": 3, "bc": 4}}} + } + }, + "branch_3": {"error": YAMLFileCorruptedError("failed")}, }, + all_branches=True, markdown=True, ) out, _ = capsys.readouterr() assert out == textwrap.dedent( """\ - | Path | diff | new | old | - |--------|--------|-------|-------| - | x.b | - | 6 | 5 | - | a.d.e | 1 | 4 | 3 | - | a.b.c | 1 | 2 | 1 | + | Revision | Path | a | b.ad | b.bc | c | + |------------|--------------|-----|--------|--------|-----| + | branch_1 | metrics.json | - | 1 | 2 | 4 | + | branch_2 | metrics.json | 1 | 3 | 4 | - | """ ) diff --git a/tests/unit/test_metrics.py b/tests/unit/test_metrics.py index 4063e26d5e..c5a6a37833 100644 --- a/tests/unit/test_metrics.py +++ b/tests/unit/test_metrics.py @@ -26,7 +26,7 @@ def test_metrics_order(tmp_dir, dvc): name="stage2", ) - assert list(dvc.metrics.show()[""]) == [ + assert list(dvc.metrics.show()[""]["data"]) == [ "p.json", os.path.join("sub", "p4.json"), "p1.json", diff --git a/tests/unit/test_params.py b/tests/unit/test_params.py index 2b7d3dd51e..dc5833295e 100644 --- a/tests/unit/test_params.py +++ b/tests/unit/test_params.py @@ -23,7 +23,7 @@ def test_params_order(tmp_dir, dvc): dvc.stage.add(params=[{params_path: ["p"]}], cmd="cmd2", name="stage2") # params are sorted during dumping, therefore p1 is first - assert list(dvc.params.show()[""]) == [ + assert list(dvc.params.show()[""]["data"]) == [ "params1.yaml", p2_path, "params.yaml",