Skip to content

Commit

Permalink
Refactor Ensemble Functions & Change Ensemble APIs (LLNL#61)
Browse files Browse the repository at this point in the history
* Remove __init__ and make methods static to avoid state

* Refactor unify_ensemble

* Refactor columnar_join

* Refactor ensemble.py

* Refactor 3 unify functions into one function

* Update unit test

* Add concat_thickets function

* Change warnings to errors

* Update header_list API to 'headers'

* Change 'column_name' API to 'metadata_key'

* Update stats test function naming
  • Loading branch information
michaelmckinsey1 authored Sep 8, 2023
1 parent 6bbf77d commit 445aa83
Show file tree
Hide file tree
Showing 10 changed files with 594 additions and 552 deletions.
1 change: 1 addition & 0 deletions thicket/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# make flake8 unused names in this file.
# flake8: noqa: F401

from .ensemble import Ensemble
from .thicket import Thicket
from .thicket import InvalidFilter
from .thicket import EmptyMetadataTable
Expand Down
400 changes: 400 additions & 0 deletions thicket/ensemble.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions thicket/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def _resolve_missing_indicies(th_list):
def _sync_nodes(gh, df):
"""Set the node objects to be equal in both the graph and the dataframe.
Operations: (n tree nodes) X (m df nodes) X (m)
id(graph_node) == id(df_node) after this function for nodes with equivalent hatchet
nid's.
"""
Expand Down
41 changes: 21 additions & 20 deletions thicket/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@


@pytest.fixture
def columnar_join_thicket(mpi_scaling_cali, rajaperf_basecuda_xl_cali):
"""Generator for 'columnar_join' thicket.
def thicket_axis_columns(mpi_scaling_cali, rajaperf_basecuda_xl_cali):
"""Generator for 'concat_thickets(axis="columns")' thicket.
Arguments:
mpi_scaling_cali (list): List of Caliper files for MPI scaling study.
rajaperf_basecuda_xl_cali (list): List of Caliper files for base cuda variant.
Returns:
list: List of original thickets, list of deepcopies of original thickets, and
columnar-joined thicket.
column-joined thicket.
"""
th_mpi_1 = Thicket.from_caliperreader(mpi_scaling_cali[0:2])
th_mpi_2 = Thicket.from_caliperreader(mpi_scaling_cali[2:4])
Expand All @@ -39,45 +39,46 @@ def columnar_join_thicket(mpi_scaling_cali, rajaperf_basecuda_xl_cali):
th_mpi_2_deep = th_mpi_2.deepcopy()
th_cuda128_deep = th_cuda128.deepcopy()

thicket_list = [th_mpi_1, th_mpi_2, th_cuda128]
thicket_list_cp = [th_mpi_1_deep, th_mpi_2_deep, th_cuda128_deep]
thickets = [th_mpi_1, th_mpi_2, th_cuda128]
thickets_cp = [th_mpi_1_deep, th_mpi_2_deep, th_cuda128_deep]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
header_list=["MPI1", "MPI2", "Cuda128"],
column_name="ProblemSize",
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
headers=["MPI1", "MPI2", "Cuda128"],
metadata_key="ProblemSize",
)

return thicket_list, thicket_list_cp, combined_th
return thickets, thickets_cp, combined_th


@pytest.fixture
def stats_columnar_join_thicket(rajaperf_basecuda_xl_cali):
"""Generator for 'columnar_join' thicket for test_stats.py.
def stats_thicket_axis_columns(rajaperf_basecuda_xl_cali):
"""Generator for 'concat_thickets(axis="columns")' thicket for test_stats.py.
Arguments:
mpi_scaling_cali (list): List of Caliper files for MPI scaling study.
rajaperf_basecuda_xl_cali (list): List of Caliper files for base cuda variant.
Returns:
list: List of original thickets, list of deepcopies of original thickets, and
columnar-joined thicket.
column-joined thicket.
"""
th_cuda128_1 = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali[0:4])
th_cuda128_2 = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali[5:9])

# To check later if modifications were unexpectedly made
th_cuda128_1_deep = th_cuda128_1.deepcopy()
th_cuda128_2_deep = th_cuda128_2.deepcopy()
thicket_list = [th_cuda128_1, th_cuda128_2]
thicket_list_cp = [th_cuda128_1_deep, th_cuda128_2_deep]
thickets = [th_cuda128_1, th_cuda128_2]
thickets_cp = [th_cuda128_1_deep, th_cuda128_2_deep]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
header_list=["Cuda 1", "Cuda 2"],
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
headers=["Cuda 1", "Cuda 2"],
)

return thicket_list, thicket_list_cp, combined_th
return thickets, thickets_cp, combined_th


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,58 @@
from test_filter_metadata import filter_multiple_and
from test_filter_stats import check_filter_stats
from test_query import check_query
from thicket import Thicket


def test_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_concat_thickets_index(mpi_scaling_cali):
th_27 = Thicket.from_caliperreader(mpi_scaling_cali[0])
th_64 = Thicket.from_caliperreader(mpi_scaling_cali[1])

tk = Thicket.concat_thickets([th_27, th_64])

# Check dataframe shape
tk.dataframe.shape == (90, 7)

# Check that the two Thickets are equivalent
assert tk

# Check specific values. Row order can vary so use "sum" to check
node = tk.dataframe.index.get_level_values("node")[8]
assert sum(tk.dataframe.loc[node, "Min time/rank"]) == 0.000453


def test_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# Check no original objects modified
for i in range(len(thicket_list)):
assert thicket_list[i].dataframe.equals(thicket_list_cp[i].dataframe)
assert thicket_list[i].metadata.equals(thicket_list_cp[i].metadata)
for i in range(len(thickets)):
assert thickets[i].dataframe.equals(thickets_cp[i].dataframe)
assert thickets[i].metadata.equals(thickets_cp[i].metadata)

# Check dataframe shape. Should be columnar-joined
assert combined_th.dataframe.shape[0] <= sum(
[th.dataframe.shape[0] for th in thicket_list]
[th.dataframe.shape[0] for th in thickets]
) # Rows. Should be <= because some rows will exist across multiple thickets.
assert (
combined_th.dataframe.shape[1]
== sum([th.dataframe.shape[1] for th in thicket_list]) - len(thicket_list) + 1
== sum([th.dataframe.shape[1] for th in thickets]) - len(thickets) + 1
) # Columns. (-1) for each name column removed, (+1) singular name column created.

# Check metadata shape. Should be columnar-joined
assert combined_th.metadata.shape[0] == max(
[th.metadata.shape[0] for th in thicket_list]
[th.metadata.shape[0] for th in thickets]
) # Rows. Should be max because all rows should exist in all thickets.
assert combined_th.metadata.shape[1] == sum(
[th.metadata.shape[1] for th in thicket_list]
[th.metadata.shape[1] for th in thickets]
) - len(
thicket_list
thickets
) # Columns. (-1) Since we added an additional column "ProblemSize".

# Check profiles
assert len(combined_th.profile) == sum([len(th.profile) for th in thicket_list])
assert len(combined_th.profile) == sum([len(th.profile) for th in thickets])

# Check profile_mapping
assert len(combined_th.profile_mapping) == sum(
[len(th.profile_mapping) for th in thicket_list]
[len(th.profile_mapping) for th in thickets]
)

# PerfData and StatsFrame nodes should be in the same order.
Expand All @@ -55,8 +73,8 @@ def test_columnar_join(columnar_join_thicket):
).all()


def test_filter_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_filter_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# columns and corresponding values to filter by
columns_values = {
("MPI1", "mpi.world.size"): [27],
Expand All @@ -67,8 +85,8 @@ def test_filter_columnar_join(columnar_join_thicket):
filter_multiple_and(combined_th, columns_values)


def test_filter_stats_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_filter_stats_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# columns and corresponding values to filter by
columns_values = {
("test", "test_string_column"): ["less than 20"],
Expand All @@ -86,8 +104,8 @@ def test_filter_stats_columnar_join(columnar_join_thicket):
check_filter_stats(combined_th, columns_values)


def test_query_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_query_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# test arguments
hnids = [0, 1, 2, 3, 5, 6, 8, 9]
query = (
Expand Down
32 changes: 17 additions & 15 deletions thicket/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from thicket import Thicket, EmptyMetadataTable
from test_columnar_join import test_columnar_join
from test_concat_thickets import test_concat_thickets_columns
from utils import check_identity


Expand Down Expand Up @@ -89,7 +89,7 @@ def test_groupby(example_cali):
check_groupby(th, columns_values)


def test_groupby_columnar_join(example_cali):
def test_groupby_concat_thickets_columns(example_cali):
"""Tests case where the Sub-Thickets of a groupby are used in a columnar join"""
# example thicket
th = Thicket.from_caliperreader(example_cali)
Expand All @@ -106,23 +106,24 @@ def test_groupby_columnar_join(example_cali):
th_list[2].metadata[selected_column] = problem_size
th_list[3].metadata[selected_column] = problem_size

thicket_list = [th_list[0], th_list[1], th_list[2], th_list[3]]
thicket_list_cp = [
thickets = [th_list[0], th_list[1], th_list[2], th_list[3]]
thickets_cp = [
th_list[0].deepcopy(),
th_list[1].deepcopy(),
th_list[2].deepcopy(),
th_list[3].deepcopy(),
]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
column_name=selected_column,
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
metadata_key=selected_column,
)

test_columnar_join((thicket_list, thicket_list_cp, combined_th))
test_concat_thickets_columns((thickets, thickets_cp, combined_th))


def test_groupby_columnar_join_subthickets(example_cali):
def test_groupby_concat_thickets_columns_subthickets(example_cali):
"""Tests case where some specific Sub-Thickets of a groupby are used in a columnar join"""
# example thicket
th = Thicket.from_caliperreader(example_cali)
Expand All @@ -137,15 +138,16 @@ def test_groupby_columnar_join_subthickets(example_cali):
th_list[0].metadata[selected_column] = problem_size
th_list[1].metadata[selected_column] = problem_size

thicket_list = [th_list[0], th_list[1]]
thicket_list_cp = [
thickets = [th_list[0], th_list[1]]
thickets_cp = [
th_list[0].deepcopy(),
th_list[1].deepcopy(),
]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
column_name=selected_column,
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
metadata_key=selected_column,
)

test_columnar_join((thicket_list, thicket_list_cp, combined_th))
test_concat_thickets_columns((thickets, thickets_cp, combined_th))
5 changes: 5 additions & 0 deletions thicket/tests/test_intersection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ def test_intersection(example_cali):

intersected_th = th_ens.intersection()

intersected_th_other = th.from_caliperreader(example_cali, intersection=True)

# Check other methodology
assert len(intersected_th.graph) == len(intersected_th_other.graph)

# Check original and intersected thickets
assert len(th_ens.dataframe) == 344
assert len(intersected_th.dataframe) == 4
Expand Down
Loading

0 comments on commit 445aa83

Please sign in to comment.