Skip to content

Commit

Permalink
added option to save checks as json (deepchecks#665)
Browse files Browse the repository at this point in the history
* v0

* ה0

* v0

* v1

* v1

* v1

* v1

* v1

* Adding draft guide of exporting to JSON file

* v1

* merge

* linting

* linting

* some_changes

* some_changes

* some_changes

* some_changes

* update_tests

* Revert "some_changes"

This reverts commit dd024e2.

* linting

* tiny_fix

* update export json guide

* renaming serialize to export json

* fix_liting

* updated notebook and removed previously named one

* updated notebook

* fix link in notebook

* fix link in notebook

Co-authored-by: Itay Gabbay <[email protected]>
Co-authored-by: Shir <[email protected]>
  • Loading branch information
3 people authored Jan 19, 2022
1 parent eb49b84 commit 4732188
Show file tree
Hide file tree
Showing 8 changed files with 5,090 additions and 22 deletions.
132 changes: 127 additions & 5 deletions deepchecks/base/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,29 @@
"""Module containing all the base classes for checks."""
# pylint: disable=broad-except
import abc
import base64
import inspect
import io
import traceback
from collections import OrderedDict
from functools import wraps
from typing import Any, Callable, List, Sequence, Union, Dict, Mapping, cast

import jsonpickle
from matplotlib import pyplot as plt
import matplotlib
import pandas as pd
import numpy as np
import ipywidgets as widgets
import plotly.graph_objects as go
from matplotlib import pyplot as plt
from IPython.core.display import display_html
from pandas.io.formats.style import Styler
from plotly.basedatatypes import BaseFigure
import plotly

from deepchecks.base.condition import Condition, ConditionCategory, ConditionResult
from deepchecks.base.dataset import Dataset
from deepchecks.base.display_pandas import dataframe_to_html, get_conditions_table_display
from deepchecks.base.display_pandas import dataframe_to_html, get_conditions_table
from deepchecks.utils.typing import Hashable, BasicModel
from deepchecks.utils.strings import get_docs_summary, split_camel_case
from deepchecks.utils.ipython import is_ipython_display
Expand All @@ -51,6 +57,22 @@
]


def _save_all_open_figures():
figs = [plt.figure(n) for n in plt.get_fignums()]
images = []
for fig in figs:
bio = io.BytesIO()
fig.savefig(bio, format='png')
encoded = base64.b64encode(bio.getvalue()).decode('utf-8')
images.append(encoded)
fig.clear()
return images


_CONDITIONS_HEADER = '<h5>Conditions Summary</h5>'
_ADDITIONAL_OUTPUTS_HEADER = '<h5>Additional Outputs</h5>'


class CheckResult:
"""Class which returns from a check with result that can later be used for automatic pipelines and display value.
Expand Down Expand Up @@ -120,10 +142,10 @@ def display_check(self, unique_id: str = None, as_widget: bool = False,
summary = get_docs_summary(self.check)
check_html += f'<p>{summary}</p>'
if self.conditions_results:
check_html += '<h5>Conditions Summary</h5>'
check_html += get_conditions_table_display(self, unique_id)
check_html += _CONDITIONS_HEADER
check_html += dataframe_to_html(get_conditions_table(self, unique_id))
if show_additional_outputs:
check_html += '<h5>Additional Outputs</h5>'
check_html += _ADDITIONAL_OUTPUTS_HEADER
for item in self.display:
if isinstance(item, (pd.DataFrame, Styler)):
check_html += dataframe_to_html(item)
Expand Down Expand Up @@ -165,6 +187,90 @@ def display_check(self, unique_id: str = None, as_widget: bool = False,
return box
display_html(check_html, raw=True)

def _display_to_json(self):
displays = []
old_backend = matplotlib.get_backend()
for item in self.display:
if isinstance(item, Styler):
displays.append(('dataframe', item.data.to_json(orient='records')))
elif isinstance(item, pd.DataFrame):
displays.append(('dataframe', item.to_json(orient='records')))
elif isinstance(item, str):
displays.append(('html', item))
elif isinstance(item, BaseFigure):
displays.append(('plotly', item.to_json()))
elif callable(item):
try:
matplotlib.use('Agg')
item()
displays.append(('plt', _save_all_open_figures()))
except Exception:
displays.append(('plt', ''))
else:
matplotlib.use(old_backend)
raise Exception(f'Unable to create json for item of type: {type(item)}')
matplotlib.use(old_backend)
return displays

def to_json(self, with_display: bool = True):
"""Return check result as json.
Args:
with_display (bool): controls if to serialize display or not
Returns:
json in the format:
{'name': .., 'params': .., 'header': ..,
'summary': .., 'conditions_table': .., 'value', 'display': ..}
"""
check_name = self.check.name()
parameters = self.check.params()
header = self.get_header()
result_json = {'name': check_name, 'params': parameters, 'header': header,
'summary': get_docs_summary(self.check)}
if self.conditions_results:
cond_df = get_conditions_table(self)
result_json['conditions_table'] = cond_df.data.to_json(orient='records')
if isinstance(self.value, pd.DataFrame):
result_json['value'] = self.value.to_json()
elif isinstance(self.value, np.ndarray):
result_json['value'] = self.value.tolist()
else:
result_json['value'] = self.value
if with_display:
display_json = self._display_to_json()
result_json['display'] = display_json
return jsonpickle.dumps(result_json)

@staticmethod
def display_from_json(json_data):
"""Display the check result from a json received from a to_json."""
json_data = jsonpickle.loads(json_data)
if json_data.get('display') is None:
return
header = json_data['header']
summary = json_data['summary']
display_html(f'<h4>{header}</h4>', raw=True)
display_html(f'<p>{summary}</p>', raw=True)
if json_data.get('conditions_table'):
display_html(_CONDITIONS_HEADER, raw=True)
conditions_table = pd.read_json(json_data['conditions_table'], orient='records')
display_html(dataframe_to_html(conditions_table.style.hide_index()), raw=True)
display_html(_ADDITIONAL_OUTPUTS_HEADER, raw=True)
for display_type, value in json_data['display']:
if display_type == 'html':
display_html(value, raw=True)
elif display_type in ['conditions', 'dataframe']:
df: pd.DataFrame = pd.read_json(value, orient='records')
display_html(dataframe_to_html(df), raw=True)
elif display_type == 'plotly':
plotly_json = io.StringIO(value)
plotly.io.read_json(plotly_json).show()
elif display_type == 'plt':
display_html(f'<img src=\'data:image/png;base64,{value}\'>', raw=True)
else:
raise ValueError(f'Unexpected type of display received: {display_type}')

def _ipython_display_(self, unique_id=None, as_widget=False,
show_additional_outputs=True):
check_widget = self.display_check(unique_id=unique_id, as_widget=as_widget,
Expand Down Expand Up @@ -528,6 +634,22 @@ def __init__(self, check: BaseCheck, exception: Exception, header_suffix: str =
self.exception = exception
self.header = check.name() + header_suffix

def to_json(self, with_display: bool = True):
"""Return check failure as json.
Args:
with_display (bool): controls if to serialize display or not
Returns:
{'name': .., 'params': .., 'header': .., 'display': ..}
"""
check_name = self.check.name()
parameters = self.check.params()
result_json = {'name': check_name, 'params': parameters, 'header': self.header}
if with_display:
result_json['display'] = [('str', str(self.exception))]
return jsonpickle.dumps(result_json)

def __repr__(self):
"""Return string representation."""
tb_str = traceback.format_exception(etype=type(self.exception), value=self.exception,
Expand Down
14 changes: 7 additions & 7 deletions deepchecks/base/display_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from . import check # pylint: disable=unused-import


__all__ = ['dataframe_to_html', 'get_conditions_table_display']
__all__ = ['dataframe_to_html', 'get_conditions_table']


def dataframe_to_html(df: Union[pd.DataFrame, Styler]):
Expand Down Expand Up @@ -54,18 +54,18 @@ def dataframe_to_html(df: Union[pd.DataFrame, Styler]):
return df.to_html()


def get_conditions_table_display(check_results: Union['check.CheckResult', List['check.CheckResult']],
unique_id=None, max_info_len: int = 3000):
"""Display the conditions table as DataFrame.
def get_conditions_table(check_results: Union['check.CheckResult', List['check.CheckResult']],
unique_id=None, max_info_len: int = 3000):
"""Return the conditions table as DataFrame.
Args:
check_results (Union['CheckResult', List['CheckResult']]): check results to show conditions of.
unique_id (str): the unique id to append for the check names to create links
(won't create links if None/empty).
max_info_len (int): max length of the additional info.
Returns:
str:
html representation of the condition table.
pd.Dataframe:
the condition table.
"""
if not isinstance(check_results, List):
show_check_column = False
Expand Down Expand Up @@ -95,7 +95,7 @@ def get_conditions_table_display(check_results: Union['check.CheckResult', List[
if show_check_column is False:
conditions_table.drop('Check', axis=1, inplace=True)
conditions_table['More Info'] = conditions_table['More Info'].map(lambda x: get_ellipsis(x, max_info_len))
return dataframe_to_html(conditions_table.style.hide_index())
return conditions_table.style.hide_index()


def get_result_navigation_display(check_results: Union['check.CheckResult', List['check.CheckResult']],
Expand Down
6 changes: 3 additions & 3 deletions deepchecks/base/display_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from deepchecks.utils.ipython import is_widgets_enabled
from deepchecks.utils.strings import get_random_string
from deepchecks.base.check import CheckResult, CheckFailure
from deepchecks.base.display_pandas import dataframe_to_html, get_conditions_table_display, \
from deepchecks.base.display_pandas import dataframe_to_html, get_conditions_table, \
get_result_navigation_display


Expand Down Expand Up @@ -116,7 +116,7 @@ def _display_suite_widgets(summary: str,
tab.set_title(2, 'Checks Without Output')

if checks_with_conditions:
cond_html_table = get_conditions_table_display(checks_with_conditions, unique_id, 300)
cond_html_table = dataframe_to_html(get_conditions_table(checks_with_conditions, unique_id, 300))
h2_widget = widgets.HTML(_CONDITIONS_SUMMARY_TITLE)
condition_tab_children = [h2_widget, _create_table_widget(cond_html_table)]
else:
Expand Down Expand Up @@ -196,7 +196,7 @@ def _display_suite_no_widgets(summary: str,
display_html(bold_hr + summary, raw=True)

if checks_with_conditions:
cond_html_table = get_conditions_table_display(checks_with_conditions, unique_id, 300)
cond_html_table = dataframe_to_html(get_conditions_table(checks_with_conditions, unique_id, 300))
display_html(_CONDITIONS_SUMMARY_TITLE + cond_html_table, raw=True)
else:
display_html(_NO_CONDITIONS_SUMMARY_TITLE, raw=True)
Expand Down
27 changes: 22 additions & 5 deletions deepchecks/base/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from collections import OrderedDict
from typing import Union, List, Optional, Tuple, Any, Container, Mapping

import jsonpickle

from deepchecks.base.display_suite import display_suite_result, ProgressBar
from deepchecks.errors import DeepchecksValueError, DeepchecksNotSupportedError
from deepchecks.base.dataset import Dataset
Expand Down Expand Up @@ -61,6 +63,21 @@ def save_as_html(self, file=None):
file = 'output.html'
display_suite_result(self.name, self.results, html_out=file)

def to_json(self, with_display: bool = True):
"""Return check result as json.
Args:
with_display (bool): controls if to serialize display of checks or not
Returns:
{'name': .., 'results': ..}
"""
json_results = []
for res in self.results:
json_results.append(res.to_json(with_display=with_display))

return jsonpickle.dumps({'name': self.name, 'results': json_results})


class BaseSuite:
"""Class for running a set of checks together, and returning a unified pass / no-pass.
Expand Down Expand Up @@ -184,14 +201,14 @@ def run(
check_result = check.run(dataset=train_dataset, model=model)
check_result.header = f'{check_result.get_header()} - Train Dataset'
except Exception as exp:
check_result = CheckFailure(check.__class__, exp, ' - Train Dataset')
check_result = CheckFailure(check, exp, ' - Train Dataset')
results.append(check_result)
if test_dataset is not None:
try:
check_result = check.run(dataset=test_dataset, model=model)
check_result.header = f'{check_result.get_header()} - Test Dataset'
except Exception as exp:
check_result = CheckFailure(check.__class__, exp, ' - Test Dataset')
check_result = CheckFailure(check, exp, ' - Test Dataset')
results.append(check_result)
if train_dataset is None and test_dataset is None:
results.append(Suite._get_unsupported_failure(check))
Expand All @@ -204,7 +221,7 @@ def run(
else:
raise TypeError(f'Don\'t know how to handle type {check.__class__.__name__} in suite.')
except Exception as exp:
results.append(CheckFailure(check.__class__, exp))
results.append(CheckFailure(check, exp))
progress_bar.inc_progress()

progress_bar.close()
Expand All @@ -213,7 +230,7 @@ def run(
@classmethod
def _get_unsupported_failure(cls, check):
msg = 'Check is not supported for parameters given to suite'
return CheckFailure(check.__class__, DeepchecksNotSupportedError(msg))
return CheckFailure(check, DeepchecksNotSupportedError(msg))


class ModelComparisonSuite(BaseSuite):
Expand Down Expand Up @@ -254,7 +271,7 @@ def run(self,
check_result = check.run_logic(context)
results.append(check_result)
except Exception as exp:
results.append(CheckFailure(check.__class__, exp))
results.append(CheckFailure(check, exp))
progress_bar.inc_progress()

progress_bar.close()
Expand Down
Loading

0 comments on commit 4732188

Please sign in to comment.