Skip to content

Commit

Permalink
Update stats functions
Browse files Browse the repository at this point in the history
- Added verify thicket structures function into each stats class
- Updated the arguments based on previous discussions within PAVE and UTK-LLNL meetings
- Added a conditional check for arguments that need a column from the performance dataframe to be passed. A value error will be raised if the arg is set equal to `None`.

* Removing .swp file

* Updating path to utils

* Updating stats functions to have: Verify thicket structure, updated args, and a conditional check
  • Loading branch information
Treece-Burgess authored and slabasan committed Apr 21, 2023
1 parent 07efe0b commit bfe2c7c
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 19 deletions.
10 changes: 9 additions & 1 deletion thicket/stats/calc_average.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import numpy as np
import pandas as pd
from ..utils import verify_thicket_structures


def calc_average(thicket=None, columns=None):
def calc_average(thicket, columns=None):
"""Calculate median and mean per node.
Designed to take in a Thicket, and will append a column to the statsframe for
Expand All @@ -17,6 +18,13 @@ def calc_average(thicket=None, columns=None):
thicket (thicket): Thicket object
columns (list): list of hardware/timing metrics to perform average calculations on
"""
if columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.dataframe, index=["node", "profile"], columns=columns
)

for column in columns:
median = []
mean = []
Expand Down
35 changes: 27 additions & 8 deletions thicket/stats/calc_corr_nodewise.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,57 @@
# SPDX-License-Identifier: MIT

from scipy import stats
from ..utils import verify_thicket_structures


def calc_corr_nodewise(thicket, base=None, correlate=[], correlation="pearson"):
def calc_corr_nodewise(
thicket, base_column=None, correlate_columns=None, correlation="pearson"
):
"""Calculate the nodewise correlation on user-specified columns.
Calculates the correlation nodewise for user passed in columns. This can
either be done for the EnsembleFrame or a super thicket.
Arguments:
thicket (thicket): Thicket object
base (str): base column that you want to compare
correlate (list): list of columns to correlate to the passed in base column
base_column (str): base column that you want to compare
correlate_columns (list): list of columns to correlate to the passed in base column
correlation (str): correlation test to perform -- pearson (default),
spearman, and kendall
"""
for col in correlate:
if base_column is None or correlate_columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

if "profile" in thicket.dataframe.index.names:
verify_thicket_structures(
thicket.dataframe,
index=["node", "profile"],
columns=[base_column] + correlate_columns,
)
else:
verify_thicket_structures(
thicket.dataframe,
index=["node", "thicket"],
columns=[base_column] + correlate_columns,
)

for col in correlate_columns:
correlated = []
for node in thicket.statsframe.dataframe.index.tolist():
if correlation == "pearson":
pearson_base = thicket.dataframe.loc[node][base]
pearson_base = thicket.dataframe.loc[node][base_column]
pearson_correlate = thicket.dataframe.loc[node][col]
correlated.append(stats.pearsonr(pearson_base, pearson_correlate)[0])
elif correlation == "spearman":
spearman_base = thicket.dataframe.loc[node][base]
spearman_base = thicket.dataframe.loc[node][base_column]
spearman_correlate = thicket.dataframe.loc[node][col]
correlated.append(stats.spearmanr(spearman_base, spearman_correlate)[0])
elif correlation == "kendall":
kendall_base = thicket.dataframe.loc[node][base]
kendall_base = thicket.dataframe.loc[node][base_column]
kendall_correlate = thicket.dataframe.loc[node][col]
correlated.append(stats.kendalltau(kendall_base, kendall_correlate)[0])
else:
raise ValueError("Invalid correlation")
thicket.statsframe.dataframe[
base + "_vs_" + col + " " + correlation
base_column + "_vs_" + col + " " + correlation
] = correlated
10 changes: 9 additions & 1 deletion thicket/stats/calc_deviation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import numpy as np
import pandas as pd
from ..utils import verify_thicket_structures


def calc_deviation(thicket=None, columns=None):
def calc_deviation(thicket, columns=None):
"""Calculate standard deviation and variance per node.
Designed to take in a Thicket, and will append a column to the statsframe
Expand All @@ -20,6 +21,13 @@ def calc_deviation(thicket=None, columns=None):
thicket (thicket): Thicket object
columns (list): list of hardware/timing metrics to perform deviation calculations on
"""
if columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.dataframe, index=["node", "profile"], columns=columns
)

for column in columns:
var = []
std = []
Expand Down
10 changes: 9 additions & 1 deletion thicket/stats/calc_extremum.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
# SPDX-License-Identifier: MIT

import pandas as pd
from ..utils import verify_thicket_structures


def calc_extremum(thicket=None, columns=None):
def calc_extremum(thicket, columns=None):
"""Calculate min and max per node.
Designed to take in a Thicket, and will append a column to the statsframe
Expand All @@ -19,6 +20,13 @@ def calc_extremum(thicket=None, columns=None):
thicket (thicket): Thicket object
columns (list): list of hardware/timing metrics to perform extremnum calculations on
"""
if columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.dataframe, index=["node", "profile"], columns=columns
)

for column in columns:
minimum = []
maximum = []
Expand Down
10 changes: 9 additions & 1 deletion thicket/stats/calc_percentiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import pandas as pd
import numpy as np
from ..utils import verify_thicket_structures


def calc_percentile(thicket=None, columns=None):
def calc_percentile(thicket, columns=None):
"""Calculate q-th percentile per node.
Designed to take in a Thicket, and will append a column to the statsframe
Expand All @@ -27,6 +28,13 @@ def calc_percentile(thicket=None, columns=None):
thicket (thicket): Thicket object
columns (list): list of hardware/timing metrics to perform percentile calculations on
"""
if columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.dataframe, index=["node", "profile"], columns=columns
)

for column in columns:
percentiles = []
for node in pd.unique(thicket.dataframe.reset_index()["node"].tolist()):
Expand Down
9 changes: 8 additions & 1 deletion thicket/stats/check_normality.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import pandas as pd
from scipy import stats
from ..utils import verify_thicket_structures


def check_normality(thicket=None, columns=None):
def check_normality(thicket, columns=None):
"""
Designed to take in a Thicket, and will append a column to the statsframe.
Expand All @@ -27,6 +28,12 @@ def check_normality(thicket=None, columns=None):
statsframe: Returns statsframe with appended columns for normality check
"""
if columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.dataframe, index=["node", "profile"], columns=columns
)

for column in columns:
normality = []
Expand Down
10 changes: 9 additions & 1 deletion thicket/stats/display_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
# SPDX-License-Identifier: MIT

import seaborn as sns
from ..utils import verify_thicket_structures


def display_heatmap(thicket=None, columns=None, **kwargs):
def display_heatmap(thicket, columns=None, **kwargs):
"""Display a heatmap.
Arguments:
Expand All @@ -16,6 +17,13 @@ def display_heatmap(thicket=None, columns=None, **kwargs):
Returns:
(matplotlib Axes): object for managing plot
"""
if columns is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.statsframe.dataframe, index=["node"], columns=columns
)

thicket.statsframe.dataframe.index = thicket.statsframe.dataframe.index.map(str)

ax = sns.heatmap(thicket.statsframe.dataframe[columns], **kwargs)
Expand Down
18 changes: 13 additions & 5 deletions thicket/stats/display_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,38 @@

import pandas as pd
import seaborn as sns
from ..utils import verify_thicket_structures


def display_histogram(thicket=None, node=None, metric=None, **kwargs):
def display_histogram(thicket, node=None, column=None, **kwargs):
"""Display a histogram.
Arguments:
thicket (thicket): Thicket object
node (str): node object
metric (str): metric from ensemble frame
column (str): column from ensemble frame
Returns:
(matplotlib Axes): object for managing plot
"""
if column is None or node is None:
raise ValueError("To see a list of valid columns run get_perf_columns().")

verify_thicket_structures(
thicket.dataframe, index=["node", "profile"], columns=[column]
)

df = pd.melt(
thicket.dataframe.reset_index(),
id_vars="node",
value_vars=metric,
value_name=metric,
value_vars=column,
value_name=column,
)

df["node"] = df["node"].astype(str)

filtered_df = df[df["node"] == node]

ax = sns.displot(filtered_df, x=metric, kind="hist", **kwargs)
ax = sns.displot(filtered_df, x=column, kind="hist", **kwargs)

return ax

0 comments on commit bfe2c7c

Please sign in to comment.