From 140c719e56863c9568bbfa22c5ca593bea902795 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 15 Nov 2021 21:22:33 +0100 Subject: [PATCH] exp show: Add parallel coordinates plot. Uses `TabularData.to_parallel_coordinates`. Adds new arguments: `html`, `out`, `open` Reuses `--sort-by` to define colorscale. Closes #4455 --- dvc/command/experiments.py | 58 ++++++++++++++- dvc/render/plotly.py | 3 +- tests/func/experiments/test_show.py | 73 ++++++++++++++++++- tests/unit/command/test_experiments.py | 35 +++++++++ .../unit/render/test_parallel_coordinates.py | 16 ++-- 5 files changed, 173 insertions(+), 12 deletions(-) diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index d19c683d4f..0c1253faad 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -3,6 +3,7 @@ from collections import Counter, OrderedDict, defaultdict from datetime import date, datetime from fnmatch import fnmatch +from pathlib import Path from typing import TYPE_CHECKING, Dict, Iterable, Optional from funcy import compact, lmap @@ -387,6 +388,7 @@ def show_experiments( no_timestamp=False, csv=False, markdown=False, + html=False, **kwargs, ): from funcy.seqs import flatten as flatten_list @@ -434,7 +436,7 @@ def show_experiments( kwargs.get("iso"), ) - if no_timestamp: + if no_timestamp or html: td.drop("Created") for col in ("State", "Executor"): @@ -471,9 +473,21 @@ def show_experiments( } ) - if kwargs.get("only_changed", False): + if kwargs.get("only_changed", False) or html: td.drop_duplicates("cols") + extra_render_args = {} + if html: + td.dropna("rows") + td.column("Experiment")[:] = [ + # remove tree characters + str(x).encode("ascii", "ignore").strip().decode() + for x in td.column("Experiment") + ] + rel = kwargs.get("out") or "dvc_plots" + extra_render_args["output_path"] = (Path.cwd() / rel).resolve() + extra_render_args["color_by"] = kwargs.get("sort_by", "Experiment") + td.render( pager=pager, borders=True, @@ -482,8 +496,26 @@ def show_experiments( row_styles=row_styles, csv=csv, markdown=markdown, + html=html, + **extra_render_args, ) + if html and kwargs.get("open"): + import webbrowser + from platform import uname + + if "Microsoft" in uname().release: + url = Path(rel) / "index.html" + else: + url = Path(extra_render_args["output_path"]) / "index.html" + url = url.as_uri() + + opened = webbrowser.open(url) + + if not opened: + ui.error_write("Failed to open. Please try opening it manually.") + return 1 + def _normalize_headers(names, count): return [ @@ -541,6 +573,9 @@ def run(self): csv=self.args.csv, markdown=self.args.markdown, only_changed=self.args.only_changed, + html=self.args.html, + out=self.args.out, + open=self.args.open, ) return 0 @@ -1045,6 +1080,25 @@ def add_parser(subparsers, parent_parser): "across the selected experiments." ), ) + experiments_show_parser.add_argument( + "--html", + action="store_true", + default=False, + help="Generate a parallel coordinates plot from the tabulated output.", + ) + experiments_show_parser.add_argument( + "-o", + "--out", + default=None, + help="Destination path to save the HTML to", + metavar="", + ).complete = completion.DIR + experiments_show_parser.add_argument( + "--open", + action="store_true", + default=False, + help="Open the HTML directly in the browser.", + ) experiments_show_parser.set_defaults(func=CmdExperimentsShow) EXPERIMENTS_APPLY_HELP = ( diff --git a/dvc/render/plotly.py b/dvc/render/plotly.py index 7be5a42884..e3e45c55cc 100644 --- a/dvc/render/plotly.py +++ b/dvc/render/plotly.py @@ -47,9 +47,10 @@ def as_json(self) -> str: float_values = [float(x) for x in values] except ValueError: is_categorical = True - dummy_values = list(range(len(values))) + unique_values = sorted(set(values)) if is_categorical: + dummy_values = [unique_values.index(x) for x in values] trace["dimensions"].append( { "label": label, diff --git a/tests/func/experiments/test_show.py b/tests/func/experiments/test_show.py index 1aa6cdcd81..83e720260b 100644 --- a/tests/func/experiments/test_show.py +++ b/tests/func/experiments/test_show.py @@ -559,7 +559,6 @@ def test_show_only_changed(tmp_dir, dvc, scm, capsys): assert main(["exp", "show"]) == 0 cap = capsys.readouterr() - print(cap) assert "bar" in cap.out capsys.readouterr() @@ -567,3 +566,75 @@ def test_show_only_changed(tmp_dir, dvc, scm, capsys): cap = capsys.readouterr() assert "bar" not in cap.out + + +def test_show_parallel_coordinates(tmp_dir, dvc, scm, mocker): + from dvc.command import experiments + + webbroser_open = mocker.patch("webbrowser.open") + show_experiments = mocker.spy(experiments, "show_experiments") + + tmp_dir.gen("copy.py", COPY_SCRIPT) + params_file = tmp_dir / "params.yaml" + params_data = { + "foo": 1, + "bar": 1, + } + (tmp_dir / params_file).dump(params_data) + + dvc.run( + cmd="python copy.py params.yaml metrics.yaml", + metrics_no_cache=["metrics.yaml"], + params=["foo", "bar"], + name="copy-file", + deps=["copy.py"], + ) + scm.add( + [ + "dvc.yaml", + "dvc.lock", + "copy.py", + "params.yaml", + "metrics.yaml", + ".gitignore", + ] + ) + scm.commit("init") + + dvc.experiments.run(params=["foo=2"]) + + assert main(["exp", "show", "--html"]) == 0 + kwargs = show_experiments.call_args[1] + + assert kwargs["color_by"] == "Experiment" + html_text = (tmp_dir / "dvc_plots" / "index.html").read_text() + + assert all(rev in html_text for rev in ["workspace", "master", "[exp-"]) + + assert ( + '{"label": "metrics.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text + ) + assert ( + '{"label": "params.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text + ) + assert '"line": {"color": [2, 1, 0]' in html_text + assert '"label": "metrics.yaml:bar"' not in html_text + + assert ( + main(["exp", "show", "--html", "--color-by", "metrics.yaml:foo"]) == 0 + ) + kwargs = show_experiments.call_args[1] + + assert kwargs["color_by"] == "metrics.yaml:foo" + html_text = (tmp_dir / "dvc_plots" / "index.html").read_text() + assert '"line": {"color": [2.0, 1.0, 2.0]' in html_text + + assert main(["exp", "show", "--html", "--out", "experiments"]) == 0 + kwargs = show_experiments.call_args[1] + + assert kwargs["out"] == "experiments" + assert (tmp_dir / "experiments" / "index.html").exists() + + assert main(["exp", "show", "--html", "--open"]) == 0 + + webbroser_open.assert_called() diff --git a/tests/unit/command/test_experiments.py b/tests/unit/command/test_experiments.py index b7218f6f9f..ef387eb21e 100644 --- a/tests/unit/command/test_experiments.py +++ b/tests/unit/command/test_experiments.py @@ -651,6 +651,7 @@ def test_experiments_init_config(dvc, scm, mocker): assert isinstance(cmd, CmdExperimentsInit) assert cmd.run() == 0 + m.assert_called_once_with( ANY(Repo), name=None, @@ -762,3 +763,37 @@ def test_experiments_init_extra_args(extra_args, expected_kw, mocker): def test_experiments_init_type_invalid_choice(): with pytest.raises(DvcParserError): parse_args(["exp", "init", "--type=invalid", "cmd"]) + + +def test_show_experiments_html(tmp_dir, mocker): + all_experiments = { + "workspace": { + "baseline": { + "data": { + "timestamp": None, + "params": {"params.yaml": {"data": {"foo": 1}}}, + "queued": False, + "running": False, + "executor": None, + "metrics": { + "scores.json": {"data": {"bar": 0.9544670443829399}} + }, + } + } + }, + } + experiments_table = mocker.patch( + "dvc.command.experiments.experiments_table" + ) + td = experiments_table.return_value + + show_experiments(all_experiments, html=True) + + td.dropna.assert_called_with("rows") + + render_kwargs = td.render.call_args[1] + + for arg in ["html", "output_path", "color_by"]: + assert arg in render_kwargs + assert render_kwargs["output_path"] == tmp_dir / "dvc_plots" + assert render_kwargs["color_by"] == "Experiment" diff --git a/tests/unit/render/test_parallel_coordinates.py b/tests/unit/render/test_parallel_coordinates.py index 8c2614c1c8..898a98516c 100644 --- a/tests/unit/render/test_parallel_coordinates.py +++ b/tests/unit/render/test_parallel_coordinates.py @@ -37,7 +37,7 @@ def test_scalar_columns(): def test_categorical_columns(): td = TabularData(["col-1"]) - td.extend([["foo"], ["bar"]]) + td.extend([["foo"], ["bar"], ["foo"]]) renderer = ParallelCoordinatesRenderer(td) result = json.loads(renderer.as_json()) @@ -46,9 +46,9 @@ def test_categorical_columns(): assert result["data"][0]["dimensions"][0] == { "label": "col-1", - "values": [0, 1], - "tickvals": [0, 1], - "ticktext": ["foo", "bar"], + "values": [1, 0, 1], + "tickvals": [1, 0, 1], + "ticktext": ["foo", "bar", "foo"], } @@ -63,8 +63,8 @@ def test_mixed_columns(): assert result["data"][0]["dimensions"][0] == { "label": "categorical", - "values": [0, 1], - "tickvals": [0, 1], + "values": [1, 0], + "tickvals": [1, 0], "ticktext": ["foo", "bar"], } assert result["data"][0]["dimensions"][1] == { @@ -96,11 +96,11 @@ def test_color_by_categorical(): assert expected_format(result) assert result["data"][0]["line"] == { - "color": [0, 1], + "color": [1, 0], "showscale": True, "colorbar": { "tickmode": "array", - "tickvals": [0, 1], + "tickvals": [1, 0], "ticktext": ["foo", "bar"], }, }