Skip to content

Commit

Permalink
Update GPUTreeShap
Browse files Browse the repository at this point in the history
  • Loading branch information
RAMitchell committed Dec 8, 2020
1 parent 8629ac1 commit cc38335
Show file tree
Hide file tree
Showing 5 changed files with 520 additions and 397 deletions.
139 changes: 74 additions & 65 deletions shap/cext/_cext_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,45 @@
#include "gpu_treeshap.h"
#include "tree_shap.h"

const float inf = std::numeric_limits<float>::infinity();
const float inf = std::numeric_limits<tfloat>::infinity();

struct ShapSplitCondition {
ShapSplitCondition() = default;
ShapSplitCondition(tfloat feature_lower_bound, tfloat feature_upper_bound,
bool is_missing_branch)
: feature_lower_bound(feature_lower_bound),
feature_upper_bound(feature_upper_bound),
is_missing_branch(is_missing_branch) {
assert(feature_lower_bound <= feature_upper_bound);
}

/*! Feature values >= lower and < upper flow down this path. */
tfloat feature_lower_bound;
tfloat feature_upper_bound;
/*! Do missing values flow down this path? */
bool is_missing_branch;

// Does this instance flow down this path?
__host__ __device__ bool EvaluateSplit(float x) const {
// is nan
if (isnan(x)) {
return is_missing_branch;
}
return x > feature_lower_bound && x <= feature_upper_bound;
}

// Combine two split conditions on the same feature
__host__ __device__ void Merge(
const ShapSplitCondition& other) { // Combine duplicate features
feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);
feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);
is_missing_branch = is_missing_branch && other.is_missing_branch;
}
};

void RecurseTree(unsigned pos, const TreeEnsemble &tree,
std::vector<gpu_treeshap::PathElement> *tmp_path,
std::vector<gpu_treeshap::PathElement> *paths,
std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *tmp_path,
std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *paths,
size_t *path_idx, int num_outputs) {
if (tree.is_leaf(pos)) {
for (auto j = 0ull; j < num_outputs; j++) {
Expand Down Expand Up @@ -35,39 +69,39 @@ void RecurseTree(unsigned pos, const TreeEnsemble &tree,
double left_zero_fraction =
tree.node_sample_weights[left_child] / tree.node_sample_weights[pos];
// Encode the range of feature values that flow down this path
tmp_path->emplace_back(0, tree.features[pos], 0, -inf,
static_cast<float>(tree.thresholds[pos]), false,
tmp_path->emplace_back(0, tree.features[pos], 0, ShapSplitCondition{-inf,
tree.thresholds[pos], false},
left_zero_fraction, 0.0f);

RecurseTree(left_child, tree, tmp_path, paths, path_idx, num_outputs);

// Add left split to the path
tmp_path->back() = gpu_treeshap::PathElement(
0, tree.features[pos], 0, static_cast<float>(tree.thresholds[pos]), inf,
false, 1.0 - left_zero_fraction, 0.0f);
tmp_path->back() = gpu_treeshap::PathElement<ShapSplitCondition>(
0, tree.features[pos], 0, ShapSplitCondition{tree.thresholds[pos], inf,
false}, 1.0 - left_zero_fraction, 0.0f);

RecurseTree(tree.children_right[pos], tree, tmp_path, paths, path_idx,
num_outputs);

tmp_path->pop_back();
}

std::vector<gpu_treeshap::PathElement> ExtractPaths(const TreeEnsemble &trees) {
std::vector<gpu_treeshap::PathElement> paths;
std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> ExtractPaths(const TreeEnsemble &trees) {
std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> paths;
size_t path_idx = 0;
for (auto i = 0; i < trees.tree_limit; i++) {
TreeEnsemble tree;
trees.get_tree(tree, i);
std::vector<gpu_treeshap::PathElement> tmp_path;
std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> tmp_path;
tmp_path.reserve(tree.max_depth);
tmp_path.emplace_back(0, -1, 0, -inf, inf, false, 1.0, 0.0f);
tmp_path.emplace_back(0, -1, 0, ShapSplitCondition{-inf, inf, false}, 1.0, 0.0f);
RecurseTree(0, tree, &tmp_path, &paths, &path_idx, tree.num_outputs);
}
return paths;
}

class DeviceExplanationDataset {
thrust::device_vector<float> data;
thrust::device_vector<tfloat> data;
thrust::device_vector<bool> missing;
size_t num_features;
size_t num_rows;
Expand All @@ -79,38 +113,38 @@ public:
num_features = host_data.M;
if (background_dataset) {
num_rows = host_data.num_R;
data = thrust::device_vector<float>(
data = thrust::device_vector<tfloat>(
host_data.R, host_data.R + host_data.num_R * host_data.M);
missing = thrust::device_vector<float>(host_data.R_missing,
missing = thrust::device_vector<bool>(host_data.R_missing,
host_data.R_missing +
host_data.num_R * host_data.M);

} else {
num_rows = host_data.num_X;
data = thrust::device_vector<float>(
data = thrust::device_vector<tfloat>(
host_data.X, host_data.X + host_data.num_X * host_data.M);
missing = thrust::device_vector<float>(host_data.X_missing,
missing = thrust::device_vector<bool>(host_data.X_missing,
host_data.X_missing +
host_data.num_X * host_data.M);
}
}

class DenseDatasetWrapper {
const float *data;
const tfloat *data;
const bool *missing;
int num_rows;
int num_cols;

public:
DenseDatasetWrapper() = default;
DenseDatasetWrapper(const float *data, const bool *missing, int num_rows,
DenseDatasetWrapper(const tfloat *data, const bool *missing, int num_rows,
int num_cols)
: data(data), missing(missing), num_rows(num_rows), num_cols(num_cols) {
}
__device__ float GetElement(size_t row_idx, size_t col_idx) const {
__device__ tfloat GetElement(size_t row_idx, size_t col_idx) const {
auto idx = row_idx * num_cols + col_idx;
if (missing[idx]) {
return std::numeric_limits<float>::quiet_NaN();
return std::numeric_limits<tfloat>::quiet_NaN();
}
return data[idx];
}
Expand All @@ -124,39 +158,6 @@ public:
}
};

// Helper function that accepts an index into a flat contiguous array and the
// dimensions of a tensor and returns the indices with respect to the tensor
template <typename T, size_t N>
__device__ void flat_idx_to_tensor_idx(T flat_idx, const T (&shape)[N],
T (&out_idx)[N]) {
T current_size = shape[0];
for (auto i = 1ull; i < N; i++) {
current_size *= shape[i];
}
for (auto i = 0ull; i < N; i++) {
current_size /= shape[i];
out_idx[i] = flat_idx / current_size;
flat_idx -= current_size * out_idx[i];
}
}

// Given a shape and coordinates into a tensor, return the index into the
// backing storage one-dimensional array
template <typename T, size_t N>
__device__ T tensor_idx_to_flat_idx(const T (&shape)[N],
const T (&tensor_idx)[N]) {
T current_size = shape[0];
for (auto i = 1ull; i < N; i++) {
current_size *= shape[i];
}
T idx = 0;
for (auto i = 0ull; i < N; i++) {
current_size /= shape[i];
idx += tensor_idx[i] * current_size;
}
return idx;
}

inline void dense_tree_path_dependent_gpu(
const TreeEnsemble &trees, const ExplanationDataset &data,
tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
Expand All @@ -168,7 +169,7 @@ inline void dense_tree_path_dependent_gpu(
thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
trees.num_outputs);
gpu_treeshap::GPUTreeShap(X, paths.begin(), paths.end(), trees.num_outputs,
phis.data().get(), phis.size());
phis.begin(), phis.end());
// Add the base offset term to bias
thrust::device_vector<double> base_offset(
trees.base_offset, trees.base_offset + trees.num_outputs);
Expand All @@ -192,11 +193,11 @@ inline void dense_tree_path_dependent_gpu(
counting, counting + phis.size(), [=] __device__(size_t idx) {
size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
size_t old_idx[std::size(old_shape)];
flat_idx_to_tensor_idx(idx, old_shape, old_idx);
gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
// Define new tensor format, switch num_groups axis to end
size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
size_t transposed_idx = tensor_idx_to_flat_idx(new_shape, new_idx);
size_t transposed_idx = gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
d_transposed_phis[transposed_idx] = d_phis[idx];
});
thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
Expand All @@ -206,7 +207,11 @@ inline void
dense_tree_independent_gpu(const TreeEnsemble &trees,
const ExplanationDataset &data, tfloat *out_contribs,
tfloat transform(const tfloat, const tfloat)) {
clock_t begin = clock();
auto paths = ExtractPaths(trees);
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("ExtractPaths time: %f\n",time_spent);
DeviceExplanationDataset device_data(data);
DeviceExplanationDataset::DenseDatasetWrapper X =
device_data.GetDeviceAccessor();
Expand All @@ -216,9 +221,13 @@ dense_tree_independent_gpu(const TreeEnsemble &trees,

thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
trees.num_outputs);
begin = clock();
gpu_treeshap::GPUTreeShapInterventional(X, R, paths.begin(), paths.end(),
trees.num_outputs, phis.data().get(),
phis.size());
trees.num_outputs, phis.begin(),
phis.end());
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("GPUTreeShapInterventional time: %f\n",time_spent);
// Add the base offset term to bias
thrust::device_vector<double> base_offset(
trees.base_offset, trees.base_offset + trees.num_outputs);
Expand All @@ -242,11 +251,11 @@ dense_tree_independent_gpu(const TreeEnsemble &trees,
counting, counting + phis.size(), [=] __device__(size_t idx) {
size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
size_t old_idx[std::size(old_shape)];
flat_idx_to_tensor_idx(idx, old_shape, old_idx);
gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
// Define new tensor format, switch num_groups axis to end
size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
size_t transposed_idx = tensor_idx_to_flat_idx(new_shape, new_idx);
size_t transposed_idx = gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
d_transposed_phis[transposed_idx] = d_phis[idx];
});
thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
Expand All @@ -263,8 +272,8 @@ inline void dense_tree_path_dependent_interactions_gpu(
thrust::device_vector<float> phis((X.NumCols() + 1) * (X.NumCols() + 1) *
X.NumRows() * trees.num_outputs);
gpu_treeshap::GPUTreeShapInteractions(X, paths.begin(), paths.end(),
trees.num_outputs, phis.data().get(),
phis.size());
trees.num_outputs, phis.begin(),
phis.end());
// Add the base offset term to bias
thrust::device_vector<double> base_offset(
trees.base_offset, trees.base_offset + trees.num_outputs);
Expand All @@ -289,12 +298,12 @@ inline void dense_tree_path_dependent_interactions_gpu(
size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1),
(X.NumCols() + 1)};
size_t old_idx[std::size(old_shape)];
flat_idx_to_tensor_idx(idx, old_shape, old_idx);
gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
// Define new tensor format, switch num_groups axis to end
size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), (X.NumCols() + 1),
num_groups};
size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[3], old_idx[1]};
size_t transposed_idx = tensor_idx_to_flat_idx(new_shape, new_idx);
size_t transposed_idx = gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
d_transposed_phis[transposed_idx] = d_phis[idx];
});
thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
Expand Down
Loading

0 comments on commit cc38335

Please sign in to comment.