Skip to content

Commit

Permalink
exp show: Add parallel coordinates plot.
Browse files Browse the repository at this point in the history
Uses `TabularData.to_parallel_coordinates`.
Adds new arguments: `html`, `out`, `open`

Reuses `--sort-by` to define colorscale.

Closes #4455
  • Loading branch information
daavoo committed Dec 1, 2021
1 parent e878e36 commit 140c719
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 12 deletions.
58 changes: 56 additions & 2 deletions dvc/command/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections import Counter, OrderedDict, defaultdict
from datetime import date, datetime
from fnmatch import fnmatch
from pathlib import Path
from typing import TYPE_CHECKING, Dict, Iterable, Optional

from funcy import compact, lmap
Expand Down Expand Up @@ -387,6 +388,7 @@ def show_experiments(
no_timestamp=False,
csv=False,
markdown=False,
html=False,
**kwargs,
):
from funcy.seqs import flatten as flatten_list
Expand Down Expand Up @@ -434,7 +436,7 @@ def show_experiments(
kwargs.get("iso"),
)

if no_timestamp:
if no_timestamp or html:
td.drop("Created")

for col in ("State", "Executor"):
Expand Down Expand Up @@ -471,9 +473,21 @@ def show_experiments(
}
)

if kwargs.get("only_changed", False):
if kwargs.get("only_changed", False) or html:
td.drop_duplicates("cols")

extra_render_args = {}
if html:
td.dropna("rows")
td.column("Experiment")[:] = [
# remove tree characters
str(x).encode("ascii", "ignore").strip().decode()
for x in td.column("Experiment")
]
rel = kwargs.get("out") or "dvc_plots"
extra_render_args["output_path"] = (Path.cwd() / rel).resolve()
extra_render_args["color_by"] = kwargs.get("sort_by", "Experiment")

td.render(
pager=pager,
borders=True,
Expand All @@ -482,8 +496,26 @@ def show_experiments(
row_styles=row_styles,
csv=csv,
markdown=markdown,
html=html,
**extra_render_args,
)

if html and kwargs.get("open"):
import webbrowser
from platform import uname

if "Microsoft" in uname().release:
url = Path(rel) / "index.html"
else:
url = Path(extra_render_args["output_path"]) / "index.html"
url = url.as_uri()

opened = webbrowser.open(url)

if not opened:
ui.error_write("Failed to open. Please try opening it manually.")
return 1


def _normalize_headers(names, count):
return [
Expand Down Expand Up @@ -541,6 +573,9 @@ def run(self):
csv=self.args.csv,
markdown=self.args.markdown,
only_changed=self.args.only_changed,
html=self.args.html,
out=self.args.out,
open=self.args.open,
)
return 0

Expand Down Expand Up @@ -1045,6 +1080,25 @@ def add_parser(subparsers, parent_parser):
"across the selected experiments."
),
)
experiments_show_parser.add_argument(
"--html",
action="store_true",
default=False,
help="Generate a parallel coordinates plot from the tabulated output.",
)
experiments_show_parser.add_argument(
"-o",
"--out",
default=None,
help="Destination path to save the HTML to",
metavar="<path>",
).complete = completion.DIR
experiments_show_parser.add_argument(
"--open",
action="store_true",
default=False,
help="Open the HTML directly in the browser.",
)
experiments_show_parser.set_defaults(func=CmdExperimentsShow)

EXPERIMENTS_APPLY_HELP = (
Expand Down
3 changes: 2 additions & 1 deletion dvc/render/plotly.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ def as_json(self) -> str:
float_values = [float(x) for x in values]
except ValueError:
is_categorical = True
dummy_values = list(range(len(values)))
unique_values = sorted(set(values))

if is_categorical:
dummy_values = [unique_values.index(x) for x in values]
trace["dimensions"].append(
{
"label": label,
Expand Down
73 changes: 72 additions & 1 deletion tests/func/experiments/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,11 +559,82 @@ def test_show_only_changed(tmp_dir, dvc, scm, capsys):
assert main(["exp", "show"]) == 0
cap = capsys.readouterr()

print(cap)
assert "bar" in cap.out

capsys.readouterr()
assert main(["exp", "show", "--only-changed"]) == 0
cap = capsys.readouterr()

assert "bar" not in cap.out


def test_show_parallel_coordinates(tmp_dir, dvc, scm, mocker):
from dvc.command import experiments

webbroser_open = mocker.patch("webbrowser.open")
show_experiments = mocker.spy(experiments, "show_experiments")

tmp_dir.gen("copy.py", COPY_SCRIPT)
params_file = tmp_dir / "params.yaml"
params_data = {
"foo": 1,
"bar": 1,
}
(tmp_dir / params_file).dump(params_data)

dvc.run(
cmd="python copy.py params.yaml metrics.yaml",
metrics_no_cache=["metrics.yaml"],
params=["foo", "bar"],
name="copy-file",
deps=["copy.py"],
)
scm.add(
[
"dvc.yaml",
"dvc.lock",
"copy.py",
"params.yaml",
"metrics.yaml",
".gitignore",
]
)
scm.commit("init")

dvc.experiments.run(params=["foo=2"])

assert main(["exp", "show", "--html"]) == 0
kwargs = show_experiments.call_args[1]

assert kwargs["color_by"] == "Experiment"
html_text = (tmp_dir / "dvc_plots" / "index.html").read_text()

assert all(rev in html_text for rev in ["workspace", "master", "[exp-"])

assert (
'{"label": "metrics.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text
)
assert (
'{"label": "params.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text
)
assert '"line": {"color": [2, 1, 0]' in html_text
assert '"label": "metrics.yaml:bar"' not in html_text

assert (
main(["exp", "show", "--html", "--color-by", "metrics.yaml:foo"]) == 0
)
kwargs = show_experiments.call_args[1]

assert kwargs["color_by"] == "metrics.yaml:foo"
html_text = (tmp_dir / "dvc_plots" / "index.html").read_text()
assert '"line": {"color": [2.0, 1.0, 2.0]' in html_text

assert main(["exp", "show", "--html", "--out", "experiments"]) == 0
kwargs = show_experiments.call_args[1]

assert kwargs["out"] == "experiments"
assert (tmp_dir / "experiments" / "index.html").exists()

assert main(["exp", "show", "--html", "--open"]) == 0

webbroser_open.assert_called()
35 changes: 35 additions & 0 deletions tests/unit/command/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ def test_experiments_init_config(dvc, scm, mocker):

assert isinstance(cmd, CmdExperimentsInit)
assert cmd.run() == 0

m.assert_called_once_with(
ANY(Repo),
name=None,
Expand Down Expand Up @@ -762,3 +763,37 @@ def test_experiments_init_extra_args(extra_args, expected_kw, mocker):
def test_experiments_init_type_invalid_choice():
with pytest.raises(DvcParserError):
parse_args(["exp", "init", "--type=invalid", "cmd"])


def test_show_experiments_html(tmp_dir, mocker):
all_experiments = {
"workspace": {
"baseline": {
"data": {
"timestamp": None,
"params": {"params.yaml": {"data": {"foo": 1}}},
"queued": False,
"running": False,
"executor": None,
"metrics": {
"scores.json": {"data": {"bar": 0.9544670443829399}}
},
}
}
},
}
experiments_table = mocker.patch(
"dvc.command.experiments.experiments_table"
)
td = experiments_table.return_value

show_experiments(all_experiments, html=True)

td.dropna.assert_called_with("rows")

render_kwargs = td.render.call_args[1]

for arg in ["html", "output_path", "color_by"]:
assert arg in render_kwargs
assert render_kwargs["output_path"] == tmp_dir / "dvc_plots"
assert render_kwargs["color_by"] == "Experiment"
16 changes: 8 additions & 8 deletions tests/unit/render/test_parallel_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_scalar_columns():

def test_categorical_columns():
td = TabularData(["col-1"])
td.extend([["foo"], ["bar"]])
td.extend([["foo"], ["bar"], ["foo"]])
renderer = ParallelCoordinatesRenderer(td)

result = json.loads(renderer.as_json())
Expand All @@ -46,9 +46,9 @@ def test_categorical_columns():

assert result["data"][0]["dimensions"][0] == {
"label": "col-1",
"values": [0, 1],
"tickvals": [0, 1],
"ticktext": ["foo", "bar"],
"values": [1, 0, 1],
"tickvals": [1, 0, 1],
"ticktext": ["foo", "bar", "foo"],
}


Expand All @@ -63,8 +63,8 @@ def test_mixed_columns():

assert result["data"][0]["dimensions"][0] == {
"label": "categorical",
"values": [0, 1],
"tickvals": [0, 1],
"values": [1, 0],
"tickvals": [1, 0],
"ticktext": ["foo", "bar"],
}
assert result["data"][0]["dimensions"][1] == {
Expand Down Expand Up @@ -96,11 +96,11 @@ def test_color_by_categorical():

assert expected_format(result)
assert result["data"][0]["line"] == {
"color": [0, 1],
"color": [1, 0],
"showscale": True,
"colorbar": {
"tickmode": "array",
"tickvals": [0, 1],
"tickvals": [1, 0],
"ticktext": ["foo", "bar"],
},
}
Expand Down

0 comments on commit 140c719

Please sign in to comment.