Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exp show: Add parallel coordinates plot. #6933

Merged
merged 4 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions dvc/command/experiments/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from collections import Counter, OrderedDict, defaultdict
from datetime import date, datetime
from fnmatch import fnmatch
from pathlib import Path
from typing import TYPE_CHECKING, Dict, Iterable, Optional

from funcy import lmap

from dvc.command import completion
from dvc.command.base import CmdBase, append_doc_link
from dvc.command.metrics import DEFAULT_PRECISION
from dvc.exceptions import DvcException, InvalidArgumentError
Expand Down Expand Up @@ -382,6 +384,7 @@ def show_experiments(
no_timestamp=False,
csv=False,
markdown=False,
html=False,
**kwargs,
):
from funcy.seqs import flatten as flatten_list
Expand Down Expand Up @@ -429,7 +432,7 @@ def show_experiments(
kwargs.get("iso"),
)

if no_timestamp:
if no_timestamp or html:
td.drop("Created")

for col in ("State", "Executor"):
Expand Down Expand Up @@ -466,9 +469,21 @@ def show_experiments(
}
)

if kwargs.get("only_changed", False):
if kwargs.get("only_changed", False) or html:
td.drop_duplicates("cols")

html_args = {}
if html:
td.dropna("rows", how="all")
td.column("Experiment")[:] = [
# remove tree characters
str(x).encode("ascii", "ignore").strip().decode()
for x in td.column("Experiment")
]
out = kwargs.get("out") or "dvc_plots"
html_args["output_path"] = (Path.cwd() / out).resolve()
html_args["color_by"] = kwargs.get("sort_by") or "Experiment"

td.render(
pager=pager,
borders=True,
Expand All @@ -477,8 +492,13 @@ def show_experiments(
row_styles=row_styles,
csv=csv,
markdown=markdown,
html=html,
**html_args,
)

if html and kwargs.get("open"):
return ui.open_browser(Path(out) / "index.html")


def _normalize_headers(names, count):
return [
Expand Down Expand Up @@ -544,6 +564,9 @@ def run(self):
csv=self.args.csv,
markdown=self.args.markdown,
only_changed=self.args.only_changed,
html=self.args.html,
out=self.args.out,
open=self.args.open,
)
return 0

Expand Down Expand Up @@ -693,4 +716,23 @@ def add_parser(experiments_subparsers, parent_parser):
"across the selected experiments."
),
)
experiments_show_parser.add_argument(
"--html",
action="store_true",
default=False,
help="Generate a parallel coordinates plot from the tabulated output.",
)
experiments_show_parser.add_argument(
"-o",
"--out",
default=None,
help="Destination folder to save the HTML to",
metavar="<path>",
).complete = completion.DIR
experiments_show_parser.add_argument(
"--open",
action="store_true",
default=False,
help="Open the HTML directly in the browser.",
)
experiments_show_parser.set_defaults(func=CmdExperimentsShow)
20 changes: 20 additions & 0 deletions dvc/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,18 @@ def to_csv(self) -> str:
writer.writerow(row)
return buff.getvalue()

def to_parallel_coordinates(self, output_path, color_by):
from dvc.render.html import write
from dvc.render.plotly import ParallelCoordinatesRenderer

index_path = write(
output_path,
renderers=[
ParallelCoordinatesRenderer(self, color_by, self._fill_value)
],
)
return index_path.as_uri()

def add_column(self, name: str) -> None:
self._columns[name] = Column([self._fill_value] * len(self))
self._keys.append(name)
Expand All @@ -173,6 +185,14 @@ def render(self, **kwargs: Any):

if kwargs.pop("csv", False):
ui.write(self.to_csv(), end="")

elif kwargs.pop("html", False):
ui.write(
self.to_parallel_coordinates(
kwargs["output_path"], kwargs.get("color_by")
)
)

else:
ui.table(self, headers=self.keys(), **kwargs)

Expand Down
92 changes: 92 additions & 0 deletions dvc/render/plotly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import json
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Dict, Optional

from dvc.render.base import Renderer

if TYPE_CHECKING:
from dvc.compare import TabularData


class ParallelCoordinatesRenderer(Renderer):
TYPE = "plotly"

DIV = """
<div id = "{id}">
<script type = "text/javascript">
var plotly_data = {partial};
Plotly.newPlot("{id}", plotly_data.data, plotly_data.layout);
</script>
</div>
"""

SCRIPTS = """
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
"""

# pylint: disable=W0231
def __init__(
self,
tabular_data: "TabularData",
color_by: Optional[str] = None,
fill_value: str = "",
):
self.tabular_data = tabular_data
self.color_by = color_by
self.filename = "experiments"
self.fill_value = fill_value

def partial_html(self, **kwargs):
return self.as_json()

def as_json(self, **kwargs) -> str:
tabular_dict = defaultdict(list)
for row in self.tabular_data.as_dict():
for col_name, value in row.items():
tabular_dict[col_name].append(str(value))

trace: Dict[str, Any] = {"type": "parcoords", "dimensions": []}
for label, values in tabular_dict.items():
is_categorical = False
try:
float_values = [
float(x) if x != self.fill_value else None for x in values
]
except ValueError:
is_categorical = True

if is_categorical:
non_missing = [x for x in values if x != self.fill_value]
unique_values = sorted(set(non_missing))
unique_values.append(self.fill_value)

dummy_values = [unique_values.index(x) for x in values]

values = [
x if x != self.fill_value else "Missing" for x in values
]
trace["dimensions"].append(
{
"label": label,
"values": dummy_values,
"tickvals": dummy_values,
"ticktext": values,
}
)
else:
trace["dimensions"].append(
{"label": label, "values": float_values}
)

if label == self.color_by:
trace["line"] = {
"color": dummy_values if is_categorical else float_values,
"showscale": True,
"colorbar": {"title": self.color_by},
}
if is_categorical:
trace["line"]["colorbar"]["tickmode"] = "array"
trace["line"]["colorbar"]["tickvals"] = dummy_values
trace["line"]["colorbar"]["ticktext"] = values

return json.dumps({"data": [trace], "layout": {}})
75 changes: 75 additions & 0 deletions tests/func/experiments/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,3 +597,78 @@ def test_show_only_changed(tmp_dir, dvc, scm, capsys):
cap = capsys.readouterr()

assert "bar" not in cap.out


def test_show_parallel_coordinates(tmp_dir, dvc, scm, mocker):
from dvc.command.experiments import show

webbroser_open = mocker.patch("webbrowser.open")
show_experiments = mocker.spy(show, "show_experiments")

tmp_dir.gen("copy.py", COPY_SCRIPT)
params_file = tmp_dir / "params.yaml"
params_data = {
"foo": 1,
"bar": 1,
}
(tmp_dir / params_file).dump(params_data)

dvc.run(
cmd="python copy.py params.yaml metrics.yaml",
metrics_no_cache=["metrics.yaml"],
params=["foo", "bar"],
name="copy-file",
deps=["copy.py"],
)
scm.add(
[
"dvc.yaml",
"dvc.lock",
"copy.py",
"params.yaml",
"metrics.yaml",
".gitignore",
]
)
scm.commit("init")
daavoo marked this conversation as resolved.
Show resolved Hide resolved

dvc.experiments.run(params=["foo=2"])

assert main(["exp", "show", "--html"]) == 0
kwargs = show_experiments.call_args[1]

html_text = (tmp_dir / "dvc_plots" / "index.html").read_text()
assert all(rev in html_text for rev in ["workspace", "master", "[exp-"])

assert (
'{"label": "metrics.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text
)
assert (
'{"label": "params.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text
)
assert '"line": {"color": [2, 1, 0]' in html_text
assert '"label": "metrics.yaml:bar"' not in html_text

assert (
main(["exp", "show", "--html", "--sort-by", "metrics.yaml:foo"]) == 0
)
kwargs = show_experiments.call_args[1]

html_text = (tmp_dir / "dvc_plots" / "index.html").read_text()
assert '"line": {"color": [2.0, 1.0, 2.0]' in html_text

assert main(["exp", "show", "--html", "--out", "experiments"]) == 0
kwargs = show_experiments.call_args[1]

assert kwargs["out"] == "experiments"
assert (tmp_dir / "experiments" / "index.html").exists()

assert main(["exp", "show", "--html", "--open"]) == 0

webbroser_open.assert_called()

params_data = {"foo": 1, "bar": 1, "foobar": 2}
(tmp_dir / params_file).dump(params_data)
assert main(["exp", "show", "--html"]) == 0
html_text = (tmp_dir / "dvc_plots" / "index.html").read_text()
assert '{"label": "foobar", "values": [2.0, null, null]}' in html_text
35 changes: 35 additions & 0 deletions tests/unit/command/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,7 @@ def test_experiments_init_config(dvc, scm, mocker):

assert isinstance(cmd, CmdExperimentsInit)
assert cmd.run() == 0

m.assert_called_once_with(
ANY(Repo),
name="train",
Expand Down Expand Up @@ -758,3 +759,37 @@ def test_experiments_init_extra_args(extra_args, expected_kw, mocker):
def test_experiments_init_type_invalid_choice():
with pytest.raises(DvcParserError):
parse_args(["exp", "init", "--type=invalid", "cmd"])


def test_show_experiments_html(tmp_dir, mocker):
all_experiments = {
"workspace": {
"baseline": {
"data": {
"timestamp": None,
"params": {"params.yaml": {"data": {"foo": 1}}},
"queued": False,
"running": False,
"executor": None,
"metrics": {
"scores.json": {"data": {"bar": 0.9544670443829399}}
},
}
}
},
}
experiments_table = mocker.patch(
"dvc.command.experiments.show.experiments_table"
)
td = experiments_table.return_value

show_experiments(all_experiments, html=True)

td.dropna.assert_called_with("rows", how="all")

render_kwargs = td.render.call_args[1]

for arg in ["html", "output_path", "color_by"]:
assert arg in render_kwargs
assert render_kwargs["output_path"] == tmp_dir / "dvc_plots"
assert render_kwargs["color_by"] == "Experiment"
Loading