Update GPUTreeShap

tianzhou2011 · Dec 8, 2020 · cc38335 · cc38335
1 parent 8629ac1
commit cc38335
Show file tree

Hide file tree

Showing 5 changed files with 520 additions and 397 deletions.
diff --git a/shap/cext/_cext_gpu.cu b/shap/cext/_cext_gpu.cu
@@ -3,11 +3,45 @@
 #include "gpu_treeshap.h"
 #include "tree_shap.h"
 
-const float inf = std::numeric_limits<float>::infinity();
+const float inf = std::numeric_limits<tfloat>::infinity();
+
+struct ShapSplitCondition {
+  ShapSplitCondition() = default;
+  ShapSplitCondition(tfloat feature_lower_bound, tfloat feature_upper_bound,
+                        bool is_missing_branch)
+      : feature_lower_bound(feature_lower_bound),
+        feature_upper_bound(feature_upper_bound),
+        is_missing_branch(is_missing_branch) {
+    assert(feature_lower_bound <= feature_upper_bound);
+  }
+
+  /*! Feature values >= lower and < upper flow down this path. */
+  tfloat feature_lower_bound;
+  tfloat feature_upper_bound;
+  /*! Do missing values flow down this path? */
+  bool is_missing_branch;
+
+  // Does this instance flow down this path?
+  __host__ __device__ bool EvaluateSplit(float x) const {
+    // is nan
+    if (isnan(x)) {
+      return is_missing_branch;
+    }
+    return x > feature_lower_bound && x <= feature_upper_bound;
+  }
+
+  // Combine two split conditions on the same feature
+  __host__ __device__ void Merge(
+      const ShapSplitCondition& other) {  // Combine duplicate features
+    feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);
+    feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);
+    is_missing_branch = is_missing_branch && other.is_missing_branch;
+  }
+};
 
 void RecurseTree(unsigned pos, const TreeEnsemble &tree,
-                 std::vector<gpu_treeshap::PathElement> *tmp_path,
-                 std::vector<gpu_treeshap::PathElement> *paths,
+                 std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *tmp_path,
+                 std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *paths,
                  size_t *path_idx, int num_outputs) {
   if (tree.is_leaf(pos)) {
     for (auto j = 0ull; j < num_outputs; j++) {
@@ -35,39 +69,39 @@ void RecurseTree(unsigned pos, const TreeEnsemble &tree,
   double left_zero_fraction =
       tree.node_sample_weights[left_child] / tree.node_sample_weights[pos];
   // Encode the range of feature values that flow down this path
-  tmp_path->emplace_back(0, tree.features[pos], 0, -inf,
-                         static_cast<float>(tree.thresholds[pos]), false,
+  tmp_path->emplace_back(0, tree.features[pos], 0, ShapSplitCondition{-inf,
+                         tree.thresholds[pos], false},
                          left_zero_fraction, 0.0f);
 
   RecurseTree(left_child, tree, tmp_path, paths, path_idx, num_outputs);
 
   // Add left split to the path
-  tmp_path->back() = gpu_treeshap::PathElement(
-      0, tree.features[pos], 0, static_cast<float>(tree.thresholds[pos]), inf,
-      false, 1.0 - left_zero_fraction, 0.0f);
+  tmp_path->back() = gpu_treeshap::PathElement<ShapSplitCondition>(
+      0, tree.features[pos], 0, ShapSplitCondition{tree.thresholds[pos], inf,
+      false}, 1.0 - left_zero_fraction, 0.0f);
 
   RecurseTree(tree.children_right[pos], tree, tmp_path, paths, path_idx,
               num_outputs);
 
   tmp_path->pop_back();
 }
 
-std::vector<gpu_treeshap::PathElement> ExtractPaths(const TreeEnsemble &trees) {
-  std::vector<gpu_treeshap::PathElement> paths;
+std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> ExtractPaths(const TreeEnsemble &trees) {
+  std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> paths;
   size_t path_idx = 0;
   for (auto i = 0; i < trees.tree_limit; i++) {
     TreeEnsemble tree;
     trees.get_tree(tree, i);
-    std::vector<gpu_treeshap::PathElement> tmp_path;
+    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> tmp_path;
     tmp_path.reserve(tree.max_depth);
-    tmp_path.emplace_back(0, -1, 0, -inf, inf, false, 1.0, 0.0f);
+    tmp_path.emplace_back(0, -1, 0, ShapSplitCondition{-inf, inf, false}, 1.0, 0.0f);
     RecurseTree(0, tree, &tmp_path, &paths, &path_idx, tree.num_outputs);
   }
   return paths;
 }
 
 class DeviceExplanationDataset {
-  thrust::device_vector<float> data;
+  thrust::device_vector<tfloat> data;
   thrust::device_vector<bool> missing;
   size_t num_features;
   size_t num_rows;
@@ -79,38 +113,38 @@ public:
     num_features = host_data.M;
     if (background_dataset) {
       num_rows = host_data.num_R;
-      data = thrust::device_vector<float>(
+      data = thrust::device_vector<tfloat>(
           host_data.R, host_data.R + host_data.num_R * host_data.M);
-      missing = thrust::device_vector<float>(host_data.R_missing,
+      missing = thrust::device_vector<bool>(host_data.R_missing,
                                              host_data.R_missing +
                                                  host_data.num_R * host_data.M);
 
     } else {
       num_rows = host_data.num_X;
-      data = thrust::device_vector<float>(
+      data = thrust::device_vector<tfloat>(
           host_data.X, host_data.X + host_data.num_X * host_data.M);
-      missing = thrust::device_vector<float>(host_data.X_missing,
+      missing = thrust::device_vector<bool>(host_data.X_missing,
                                              host_data.X_missing +
                                                  host_data.num_X * host_data.M);
     }
   }
 
   class DenseDatasetWrapper {
-    const float *data;
+    const tfloat *data;
     const bool *missing;
     int num_rows;
     int num_cols;
 
   public:
     DenseDatasetWrapper() = default;
-    DenseDatasetWrapper(const float *data, const bool *missing, int num_rows,
+    DenseDatasetWrapper(const tfloat *data, const bool *missing, int num_rows,
                         int num_cols)
         : data(data), missing(missing), num_rows(num_rows), num_cols(num_cols) {
     }
-    __device__ float GetElement(size_t row_idx, size_t col_idx) const {
+    __device__ tfloat GetElement(size_t row_idx, size_t col_idx) const {
       auto idx = row_idx * num_cols + col_idx;
       if (missing[idx]) {
-        return std::numeric_limits<float>::quiet_NaN();
+        return std::numeric_limits<tfloat>::quiet_NaN();
       }
       return data[idx];
     }
@@ -124,39 +158,6 @@ public:
   }
 };
 
-// Helper function that accepts an index into a flat contiguous array and the
-// dimensions of a tensor and returns the indices with respect to the tensor
-template <typename T, size_t N>
-__device__ void flat_idx_to_tensor_idx(T flat_idx, const T (&shape)[N],
-                                       T (&out_idx)[N]) {
-  T current_size = shape[0];
-  for (auto i = 1ull; i < N; i++) {
-    current_size *= shape[i];
-  }
-  for (auto i = 0ull; i < N; i++) {
-    current_size /= shape[i];
-    out_idx[i] = flat_idx / current_size;
-    flat_idx -= current_size * out_idx[i];
-  }
-}
-
-// Given a shape and coordinates into a tensor, return the index into the
-// backing storage one-dimensional array
-template <typename T, size_t N>
-__device__ T tensor_idx_to_flat_idx(const T (&shape)[N],
-                                    const T (&tensor_idx)[N]) {
-  T current_size = shape[0];
-  for (auto i = 1ull; i < N; i++) {
-    current_size *= shape[i];
-  }
-  T idx = 0;
-  for (auto i = 0ull; i < N; i++) {
-    current_size /= shape[i];
-    idx += tensor_idx[i] * current_size;
-  }
-  return idx;
-}
-
 inline void dense_tree_path_dependent_gpu(
     const TreeEnsemble &trees, const ExplanationDataset &data,
     tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
@@ -168,7 +169,7 @@ inline void dense_tree_path_dependent_gpu(
   thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
                                     trees.num_outputs);
   gpu_treeshap::GPUTreeShap(X, paths.begin(), paths.end(), trees.num_outputs,
-                            phis.data().get(), phis.size());
+                            phis.begin(), phis.end());
   // Add the base offset term to bias
   thrust::device_vector<double> base_offset(
       trees.base_offset, trees.base_offset + trees.num_outputs);
@@ -192,11 +193,11 @@ inline void dense_tree_path_dependent_gpu(
       counting, counting + phis.size(), [=] __device__(size_t idx) {
         size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
         size_t old_idx[std::size(old_shape)];
-        flat_idx_to_tensor_idx(idx, old_shape, old_idx);
+        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
         // Define new tensor format, switch num_groups axis to end
         size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
         size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
-        size_t transposed_idx = tensor_idx_to_flat_idx(new_shape, new_idx);
+        size_t transposed_idx = gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
         d_transposed_phis[transposed_idx] = d_phis[idx];
       });
   thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
@@ -206,7 +207,11 @@ inline void
 dense_tree_independent_gpu(const TreeEnsemble &trees,
                            const ExplanationDataset &data, tfloat *out_contribs,
                            tfloat transform(const tfloat, const tfloat)) {
+  clock_t begin = clock();
   auto paths = ExtractPaths(trees);
+                                          clock_t end = clock();
+double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
+    printf("ExtractPaths time: %f\n",time_spent);
   DeviceExplanationDataset device_data(data);
   DeviceExplanationDataset::DenseDatasetWrapper X =
       device_data.GetDeviceAccessor();
@@ -216,9 +221,13 @@ dense_tree_independent_gpu(const TreeEnsemble &trees,
 
   thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
                                     trees.num_outputs);
+  begin = clock();
   gpu_treeshap::GPUTreeShapInterventional(X, R, paths.begin(), paths.end(),
-                                          trees.num_outputs, phis.data().get(),
-                                          phis.size());
+                                          trees.num_outputs, phis.begin(),
+                                          phis.end());
+                                          end = clock();
+time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
+    printf("GPUTreeShapInterventional time: %f\n",time_spent);
   // Add the base offset term to bias
   thrust::device_vector<double> base_offset(
       trees.base_offset, trees.base_offset + trees.num_outputs);
@@ -242,11 +251,11 @@ dense_tree_independent_gpu(const TreeEnsemble &trees,
       counting, counting + phis.size(), [=] __device__(size_t idx) {
         size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
         size_t old_idx[std::size(old_shape)];
-        flat_idx_to_tensor_idx(idx, old_shape, old_idx);
+        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
         // Define new tensor format, switch num_groups axis to end
         size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
         size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
-        size_t transposed_idx = tensor_idx_to_flat_idx(new_shape, new_idx);
+        size_t transposed_idx = gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
         d_transposed_phis[transposed_idx] = d_phis[idx];
       });
   thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
@@ -263,8 +272,8 @@ inline void dense_tree_path_dependent_interactions_gpu(
   thrust::device_vector<float> phis((X.NumCols() + 1) * (X.NumCols() + 1) *
                                     X.NumRows() * trees.num_outputs);
   gpu_treeshap::GPUTreeShapInteractions(X, paths.begin(), paths.end(),
-                                        trees.num_outputs, phis.data().get(),
-                                        phis.size());
+                                        trees.num_outputs, phis.begin(),
+                                        phis.end());
   // Add the base offset term to bias
   thrust::device_vector<double> base_offset(
       trees.base_offset, trees.base_offset + trees.num_outputs);
@@ -289,12 +298,12 @@ inline void dense_tree_path_dependent_interactions_gpu(
         size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1),
                               (X.NumCols() + 1)};
         size_t old_idx[std::size(old_shape)];
-        flat_idx_to_tensor_idx(idx, old_shape, old_idx);
+        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
         // Define new tensor format, switch num_groups axis to end
         size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), (X.NumCols() + 1),
                               num_groups};
         size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[3], old_idx[1]};
-        size_t transposed_idx = tensor_idx_to_flat_idx(new_shape, new_idx);
+        size_t transposed_idx = gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
         d_transposed_phis[transposed_idx] = d_phis[idx];
       });
   thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);