Skip to content

Commit

Permalink
fix floating-point overflow problem of tanh (PaddlePaddle#355)
Browse files Browse the repository at this point in the history
  • Loading branch information
hedaoyuan authored and reyoung committed Nov 8, 2016
1 parent 56b23d1 commit a07da94
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 14 deletions.
9 changes: 9 additions & 0 deletions paddle/cuda/include/hl_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ typedef struct {
#define HL_FLOAT_MIN 2.2250738585072014e-308
#endif


/**
* The maximum input value for exp, used to avoid overflow problem.
*
* Currently only used for tanh function.
*/
#define EXP_MAX_INPUT 40.0


/**
* @brief DIVUP(x, y) is similar to ceil(x / y).
* @note For CUDA, DIVUP will be used to specify
Expand Down
2 changes: 2 additions & 0 deletions paddle/cuda/src/hl_avx_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ namespace hppl {
}

__m256 tanh(const __m256 a) {
__m256 max = _mm256_set1_ps(EXP_MAX_INPUT);
__m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
tmp = _mm256_min_ps(tmp, max);
tmp = exp(tmp);
return _mm256_sub_ps(
_mm256_div_ps(_mm256_set1_ps(2.0f),
Expand Down
4 changes: 3 additions & 1 deletion paddle/cuda/src/hl_cpu_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ namespace hppl {
}

real tanh(const real a) {
return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;
real tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
return (2.0 / (1.0 + exp(tmp))) - 1.0;
}

real linear(const real a) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,7 @@ TEST(Layer, LstmLayer) {
TestConfig config;
config.layerConfig.set_type("lstmemory");
config.layerConfig.set_size(4);
config.layerConfig.set_active_type("sigmoid");
config.layerConfig.set_active_type("tanh");
config.layerConfig.set_active_state_type("sigmoid");
config.layerConfig.set_active_gate_type("sigmoid");
config.biasSize = 28;
Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/tests/test_RecurrentLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ TEST(Layer, LstmLayer) {
LayerConfig layerConfig;
layerConfig.set_type("lstmemory");
layerConfig.set_active_type("relu");
layerConfig.set_active_state_type("sigmoid");
layerConfig.set_active_state_type("tanh");
layerConfig.set_active_gate_type("sigmoid");

layerConfig.add_inputs();
Expand Down
5 changes: 4 additions & 1 deletion paddle/math/BaseMatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,10 @@ void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) {
applyBinary(binary::SquareDerivative<T>(), b);
}

DEFINE_MATRIX_BINARY_OP(Tanh, b = 2.0 / (1.0 + exp(-2 * a)) - 1.0);
DEFINE_MATRIX_BINARY_OP(Tanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template<>
void BaseMatrixT<real>::tanh(BaseMatrixT& b) {
applyBinary(binary::Tanh<real>(), b);
Expand Down
5 changes: 4 additions & 1 deletion paddle/math/MathFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,10 @@ void vLog1p(const int n, const T* a, T* r) {
binary::vLog1p<T>(), const_cast<T*>(a), r, 1, n, n, n);
}

DEFINE_MATRIX_BINARY_OP(vTanh, b = 2.0 / (1.0 + std::exp(-2 * a)) - 1.0);
DEFINE_MATRIX_BINARY_OP(vTanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template<class T>
void vTanh(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vTanh<T>, 0, 0>(
Expand Down
9 changes: 0 additions & 9 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3471,9 +3471,7 @@ void CpuMatrix::tanh(Matrix& output) {
size_t dim = getWidth();
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), dim);
errno = 0;
vTanh(numSamples * dim, getData(), output.getData());
CHECK_EQ(errno, 0) << "vTanh error";
}

void CpuMatrix::tanhDerivative(Matrix& output) {
Expand All @@ -3495,10 +3493,8 @@ void CpuMatrix::softrelu(Matrix& output) {
out[j] = x;
}
}
errno = 0;
vExp(numSamples * dim, output.getData(), output.getData());
vLog1p(numSamples * dim, output.getData(), output.getData());
CHECK_EQ(errno, 0) << "vExp+vLog1p error";
}

void CpuMatrix::softreluDerivative(Matrix& output) {
Expand All @@ -3513,9 +3509,7 @@ void CpuMatrix::softreluDerivative(Matrix& output) {
MatrixPtr tmpMat = Matrix::create(numSamples, dim);
real* tmp = tmpMat->getData();

errno = 0;
vExp(size, output.getData(), tmpMat->getData());
CHECK_EQ(errno, 0) << "vExp error";

for (size_t i = 0; i < size; ++i) {
grad[i] *= (1.0 - 1.0 / tmp[i]);
Expand All @@ -3538,10 +3532,7 @@ void CpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
out[i] = p2 * in[i];
}

// out = tanh(out)
errno = 0;
vTanh(numSamples * dim, out, out);
CHECK_EQ(errno, 0) << "vTanh error";

// out = p1 * out
for (size_t i = 0; i < numSamples * dim; ++i) {
Expand Down
1 change: 1 addition & 0 deletions paddle/math/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ add_simple_unittest(test_sparseMatrixCompare)
add_simple_unittest(test_perturbation)
add_simple_unittest(test_CpuGpuVector)
add_simple_unittest(test_Allocator)
add_simple_unittest(test_FPException)
94 changes: 94 additions & 0 deletions paddle/math/tests/test_FPException.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


/**
* This test is about floating point calculation exception.
* Paddle catches FE_INVALID, FE DIVBYZERO and FE_OVERFLOW exceptions.
*
* Some exceptions occur in the middle of a set of formulas,
* that can be circumvented by some tricks.
* For example,
* calculate tanh
* b = 2.0 / (1.0 + exp(-2 * a)) - 1.0
*
* If the result of (-2 * a) is too large,
* a FE_OVERFLOW exception occurs when calculating exp.
* But the result of tanh is no overflow problem,
* so we can add some tricks to prevent exp calculate an excessive value.
*
*/
#include <fenv.h>
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
#include "paddle/utils/Excepts.h"

using namespace paddle; // NOLINT

void SetTensorValue(Matrix& matrix, real value) {
int height = matrix.getHeight();
int width = matrix.getWidth();
int stride = matrix.getStride();
real* data = matrix.getData();
for (int i = 0; i < height; i++) {
int j = rand() % width; // NOLINT
if (typeid(matrix) == typeid(CpuMatrix)) {
data[i * stride + j] = value;
} else if (typeid(matrix) == typeid(GpuMatrix)) {
hl_memcpy(&data[i * stride + j], &value, sizeof(real));
} else {
LOG(FATAL) << "should not reach here";
}
}
}

template<typename Matrix>
void testTanh(real illegal) {
MatrixPtr A = std::make_shared<Matrix>(10, 10);
MatrixPtr B = std::make_shared<Matrix>(10, 10);
A->randomizeUniform();
B->randomizeUniform();

SetTensorValue(*A, illegal);

A->tanh(*B);
}

template<typename Matrix>
void testSigmoid(real illegal) {
MatrixPtr A = std::make_shared<Matrix>(10, 10);
MatrixPtr B = std::make_shared<Matrix>(10, 10);
A->randomizeUniform();
B->randomizeUniform();

SetTensorValue(*A, illegal);

A->sigmoid(*B);
}

TEST(fp, overflow) {
for (auto illegal : {-90.0, 90.0}) {
LOG(INFO) << " illegal=" << illegal;
testTanh<CpuMatrix>(illegal);
testSigmoid<CpuMatrix>(illegal);
}
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);

feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
return RUN_ALL_TESTS();
}

0 comments on commit a07da94

Please sign in to comment.