Add softmax support (jatinchowdhury18#15)

* wip grabbing sigmoid first to compute softmax * Add softmax activation to dense test (currently failing) * wip adding one softmax impl to test later * adding eigen impl of softmax wip * wip getting things to compile, tests still failing * Add softmax activation to model exporter script * got eigen and stl to produce same result * undercomplicating softmax impl - now working * add xsimd impl * adding accelerate implementation * using naive for-loop for softmax * adding benchmark tests * formatted code * remove extraneous memcpy * remove newlines + extraneous includes * adding alignment to eigen impl Co-authored-by: Jatin Chowdhury <[email protected]>
mod-audio · Apr 20, 2021 · 6081200 · 6081200
1 parent 802e853
commit 6081200
Show file tree

Hide file tree

Showing 14 changed files with 508 additions and 212 deletions.
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -83,6 +83,11 @@ jobs:
         ./build/rtneural_layer_bench sigmoid 5 4 4
         ./build/rtneural_layer_bench sigmoid 5 16 16
 
+    - name: Benchmark Softmax
+      run: |
+        ./build/rtneural_layer_bench softmax 5 4 4
+        ./build/rtneural_layer_bench softmax 5 16 16
+
     - name: Benchmark Model
       run: |
         ./build/rtneural_model_bench
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ Currently supported activations:
   - [x] tanh
   - [x] ReLU
   - [x] Sigmoid
-  - [ ] SoftMax
+  - [x] SoftMax
 
 ## How To Use
 

diff --git a/RTNeural/activation/activation.h b/RTNeural/activation/activation.h
@@ -103,6 +103,27 @@ class SigmoidActivation : public Activation<T>
     }
 };
 
+template <typename T>
+class SoftmaxActivation : public Activation<T>
+{
+public:
+    SoftmaxActivation(size_t size)
+        : Activation<T>(
+            size, [](T x) { return (T)0; }, "softmax")
+    {
+    }
+
+    SoftmaxActivation(std::initializer_list<size_t> sizes)
+        : SoftmaxActivation(*sizes.begin())
+    {
+    }
+
+    inline void forward(const T* input, T* out) override
+    {
+        softmax(input, out, Layer<T>::out_size);
+    }
+};
+
 } // namespace RTNeural
 
 #endif // USE_EIGEN

diff --git a/RTNeural/activation/activation_accelerate.h b/RTNeural/activation/activation_accelerate.h
@@ -101,6 +101,26 @@ class SigmoidActivation : public Activation<T>
     }
 };
 
+template <typename T>
+class SoftmaxActivation : public Activation<T>
+{
+public:
+    SoftmaxActivation(size_t size)
+        : Activation<T>(size, {}, "softmax")
+    {
+    }
+
+    SoftmaxActivation(std::initializer_list<size_t> sizes)
+        : SoftmaxActivation(*sizes.begin())
+    {
+    }
+
+    inline void forward(const T* input, T* out) override
+    {
+        softmax(input, out, Layer<T>::in_size);
+    }
+};
+
 } // namespace RTNeural
 
 #endif // ACTIVATIONACCELERATE_H_INCLUDED
diff --git a/RTNeural/activation/activation_eigen.h b/RTNeural/activation/activation_eigen.h
@@ -94,6 +94,36 @@ class SigmoidActivation : public Activation<T>
     Eigen::Matrix<T, Eigen::Dynamic, 1> outVec;
 };
 
+template <typename T>
+class SoftmaxActivation : public Activation<T>
+{
+public:
+    SoftmaxActivation(size_t size)
+        : Activation<T>(size, {}, "softmax")
+    {
+        inVec = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>::Zero(size, 1);
+        outVec = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>::Zero(size, 1);
+    }
+
+    SoftmaxActivation(std::initializer_list<size_t> sizes)
+        : SoftmaxActivation(*sizes.begin())
+    {
+    }
+
+    inline void forward(const T* input, T* out) override
+    {
+        inVec = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>, Eigen::Aligned16>(
+            input, Layer<T>::in_size, 1);
+        outVec = inVec.array();
+        softmax(outVec);
+
+        std::copy(outVec.data(), outVec.data() + Layer<T>::in_size, out);
+    }
+
+    Eigen::Matrix<T, Eigen::Dynamic, 1> inVec;
+    Eigen::Matrix<T, Eigen::Dynamic, 1> outVec;
+};
+
 } // namespace RTNeural
 
 #endif // ACTIVATIONEIGEN_H_INCLUDED
diff --git a/RTNeural/activation/activation_xsimd.h b/RTNeural/activation/activation_xsimd.h
@@ -71,6 +71,26 @@ class SigmoidActivation : public Activation<T>
     }
 };
 
+template <typename T>
+class SoftmaxActivation : public Activation<T>
+{
+public:
+    SoftmaxActivation(size_t size)
+        : Activation<T>(size, {}, "softmax")
+    {
+    }
+
+    SoftmaxActivation(std::initializer_list<size_t> sizes)
+        : SoftmaxActivation(*sizes.begin())
+    {
+    }
+
+    inline void forward(const T* input, T* out) override
+    {
+        softmax(input, out, Layer<T>::in_size);
+    }
+};
+
 } // namespace RTNeural
 
 #endif // ACTIVATIONXSIMD_H_INCLUDED
diff --git a/RTNeural/common.h b/RTNeural/common.h
@@ -13,6 +13,14 @@ sigmoid(Eigen::Matrix<T, Eigen::Dynamic, 1>& vector) noexcept
     vector = (T)1 / (((T)-1 * vector.array()).array().exp() + (T)1);
 }
 
+template <typename T>
+static inline void
+softmax(Eigen::Matrix<T, Eigen::Dynamic, 1>& vector) noexcept
+{
+    vector = vector.array().exp();
+    vector = vector / vector.sum();
+}
+
 } // namespace RTNeural
 
 #elif defined(USE_XSIMD)
@@ -94,6 +102,45 @@ static inline void sigmoid(const T* in, T* out, size_t dim) noexcept
         out[i] = 1.0 / (1.0 + std::exp(-in[i]));
 }
 
+template <typename T>
+static inline void softmax(const T* in, T* out, size_t dim) noexcept
+{
+    using b_type = xsimd::simd_type<T>;
+    auto inc = b_type::size;
+
+    T exp_sum = 0;
+
+    // size for which the vectorization is possible
+    auto vec_size = dim - dim % inc;
+    for(size_t i = 0; i < vec_size; i += inc)
+    {
+        b_type x_vec = xsimd::load_aligned(&in[i]);
+        b_type y_vec = xsimd::exp(x_vec);
+        exp_sum += xsimd::hadd(y_vec);
+        xsimd::store_aligned(&out[i], y_vec);
+    }
+
+    // Remaining part that cannot be vectorize
+    for(auto i = vec_size; i < dim; ++i)
+    {
+        out[i] = std::exp(in[i]);
+        exp_sum += out[i];
+    }
+
+    for(size_t i = 0; i < vec_size; i += inc)
+    {
+        b_type x_vec = xsimd::load_aligned(&out[i]);
+        b_type y_vec = x_vec / exp_sum;
+        xsimd::store_aligned(&out[i], y_vec);
+    }
+
+    // Remaining part that cannot be vectorize
+    for(auto i = vec_size; i < dim; ++i)
+    {
+        out[i] /= exp_sum;
+    }
+}
+
 template <typename T>
 static inline void tanh(const T* in, T* out, size_t dim) noexcept
 {
@@ -146,6 +193,29 @@ static inline void sigmoid(const double* in, double* out, size_t dim) noexcept
     vvrec(out, out, &dim_int);
 }
 
+static inline void softmax(const float* in, float* out, size_t dim) noexcept
+{
+    constexpr float one = 1.0f;
+    const auto dim_int = static_cast<int>(dim);
+    float exp_sum;
+
+    vvexpf(out, in, &dim_int);
+    vDSP_sve(out, 1, &exp_sum, dim);
+    vDSP_vsdiv(out, 1, &exp_sum, out, 1, dim);
+}
+
+static inline void softmax(const double* in, double* out, size_t dim) noexcept
+{
+    constexpr double one = 1.0;
+    constexpr double neg_one = -1.0;
+    const auto dim_int = static_cast<int>(dim);
+    double exp_sum;
+
+    vvexp(out, in, &dim_int);
+    vDSP_sveD(out, 1, &exp_sum, dim);
+    vDSP_vsdivD(out, 1, &exp_sum, out, 1, dim);
+}
+
 } // namespace RTNeural
 
 #else // STL backend
@@ -168,6 +238,22 @@ static inline T sigmoid(T value) noexcept
     return (T)1 / ((T)1 + std::exp(-value));
 }
 
+template <typename T>
+static inline void softmax(const T* input, T* out, size_t size) noexcept
+{
+    T exp_sum = 0;
+    for(size_t i = 0; i < size; ++i)
+    {
+        out[i] = std::exp(input[i]);
+        exp_sum += out[i];
+    }
+
+    for(size_t i = 0; i < size; ++i)
+    {
+        out[i] /= exp_sum;
+    }
+}
+
 } // namespace RTNeural
 
 #endif
diff --git a/RTNeural/model_loader.h b/RTNeural/model_loader.h
@@ -304,6 +304,9 @@ namespace json_parser
         if(activationType == "sigmoid")
             return std::make_unique<SigmoidActivation<T>>(dims);
 
+        if(activationType == "softmax")
+            return std::make_unique<SoftmaxActivation<T>>(dims);
+
         return {};
     }
 

diff --git a/bench/layer_creator.hpp b/bench/layer_creator.hpp
@@ -170,6 +170,11 @@ create_layer(const std::string &layer_type, size_t in_size, size_t out_size) {
     return std::move(layer);
   }
 
+  if (layer_type == "softmax") {
+    auto layer = std::make_unique<RTNeural::SoftmaxActivation<double>>(in_size);
+    return std::move(layer);
+  }
+
   std::cout << "Layer type: " << layer_type << " not found!" << std::endl;
   return {};
 }