From d5422534ae740d92f325d1d1771f810a26622056 Mon Sep 17 00:00:00 2001 From: Sergey Zagoruyko Date: Thu, 16 Jun 2016 17:08:32 +0200 Subject: [PATCH] inplace hardtanh, remove relu6 --- HardTanh.cu | 58 +++++++++++++++++++++++++++++---- ReLU6.cu | 92 ----------------------------------------------------- THCUNN.h | 18 +++-------- 3 files changed, 55 insertions(+), 113 deletions(-) delete mode 100644 ReLU6.cu diff --git a/HardTanh.cu b/HardTanh.cu index 764a3c0e06..b341f5ab08 100644 --- a/HardTanh.cu +++ b/HardTanh.cu @@ -20,14 +20,36 @@ struct hardtanhupdateOutput_functor else *output = max_val_; } + + __device__ void operator()(float *input) const + { + if (*input < min_val_) + *input = min_val_; + else if (*input > max_val_) + *input = max_val_; + } }; -void THNN_CudaHardTanh_updateOutput(THCState *state, THCudaTensor *input, THCudaTensor *output, float min_val, float max_val) +void THNN_CudaHardTanh_updateOutput( + THCState *state, + THCudaTensor *input, + THCudaTensor *output, + float min_val, + float max_val, + bool inplace) { THCUNN_assertSameGPU(state, 2, input, output); - THCudaTensor_resizeAs(state, output, input); - THC_pointwiseApply2(state, output, input, + if(inplace) + { + THCudaTensor_set(state, output, input); + THC_pointwiseApply1(state, output, hardtanhupdateOutput_functor(min_val, max_val)); + } + else + { + THCudaTensor_resizeAs(state, output, input); + THC_pointwiseApply2(state, output, input, hardtanhupdateOutput_functor(min_val, max_val)); + } } struct hardtanhupdateGradInput_functor @@ -47,13 +69,35 @@ struct hardtanhupdateGradInput_functor else *gradInput = *gradOutput; } + + __device__ void operator()(float *gradInput, const float *input) const + { + if (*input <= min_val_ || *input >= max_val_) + *gradInput = 0; + } }; -void THNN_CudaHardTanh_updateGradInput(THCState *state, THCudaTensor *input, THCudaTensor *gradOutput, THCudaTensor *gradInput, float min_val, float max_val) +void THNN_CudaHardTanh_updateGradInput( + THCState *state, + THCudaTensor *input, + THCudaTensor *gradOutput, + THCudaTensor *gradInput, + float min_val, + float max_val, + bool inplace) { THCUNN_assertSameGPU(state, 3, input, gradOutput, gradInput); - THCudaTensor_resizeAs(state, gradInput, input); - THC_pointwiseApply3(state, gradInput, input, gradOutput, - hardtanhupdateGradInput_functor(min_val, max_val)); + if (inplace) + { + THCudaTensor_resizeAs(state, gradInput, input); + THC_pointwiseApply3(state, gradInput, input, gradOutput, + hardtanhupdateGradInput_functor(min_val, max_val)); + } + else + { + THCudaTensor_set(state, gradInput, gradOutput); + THC_pointwiseApply2(state, gradInput, input, + hardtanhupdateGradInput_functor(min_val, max_val)); + } } diff --git a/ReLU6.cu b/ReLU6.cu deleted file mode 100644 index a42f2c93e0..0000000000 --- a/ReLU6.cu +++ /dev/null @@ -1,92 +0,0 @@ -#include "THCUNN.h" -#include "common.h" - -struct ReLU6UpdateOutput -{ - ReLU6UpdateOutput() {} - - __device__ __forceinline__ void operator()(float *out, float *in) - { - float x = *in; - *out = (x > 0) ? ((x < 6) ? x : 6) : 0; - } -}; - -// in-place variant -struct ReLU6UpdateOutputIP -{ - ReLU6UpdateOutputIP() {} - - __device__ __forceinline__ void operator()(float *x) - { - *x = (*x > 0) ? ((*x < 6) ? *x : 6) : 0; - } -}; - -void THNN_CudaReLU6_updateOutput(THCState *state, THCudaTensor *input, THCudaTensor *output, - bool inplace) -{ - THCUNN_assertSameGPU(state, 2, input, output); - - if (inplace) - { - THC_pointwiseApply1(state, input, - ReLU6UpdateOutputIP() - ); - THCudaTensor_set(state, output, input); - } - else - { - THCudaTensor_resizeAs(state, output, input); - THC_pointwiseApply2(state, output, input, - ReLU6UpdateOutput() - ); - } - - THCudaCheck(cudaGetLastError()); -} - -struct ReLU6UpdateGradInput -{ - ReLU6UpdateGradInput() {} - - __device__ __forceinline__ void operator()( - float *gradInput, float *input, float *gradOutput) const - { - *gradInput = (*input > 0 && *input < 6) ? *gradOutput : 0; - } -}; - -struct ReLU6UpdateGradInputIP -{ - ReLU6UpdateGradInputIP() {} - - __device__ __forceinline__ void operator()( - float *gradOutput, float *input) const - { - *gradOutput = (*input > 0 && *input < 6) ? *gradOutput : 0; - } -}; - -void THNN_CudaReLU6_updateGradInput(THCState *state, THCudaTensor *input, THCudaTensor *gradOutput, - THCudaTensor *gradInput, bool inplace) -{ - THCUNN_assertSameGPU(state, 3, input, gradInput, gradOutput); - - if (inplace) - { - THC_pointwiseApply2(state, gradOutput, input, - ReLU6UpdateGradInputIP() - ); - THCudaTensor_set(state, gradInput, gradOutput); - } - else - { - THCudaTensor_resizeAs(state, gradInput, input); - THC_pointwiseApply3(state, gradInput, input, gradOutput, - ReLU6UpdateGradInput() - ); - } - - THCudaCheck(cudaGetLastError()); -} diff --git a/THCUNN.h b/THCUNN.h index 0b9d661abf..cbc71b428e 100644 --- a/THCUNN.h +++ b/THCUNN.h @@ -97,14 +97,16 @@ TH_API void THNN_CudaHardTanh_updateOutput( THCudaTensor *input, THCudaTensor *output, float min_val, - float max_val); + float max_val, + bool inplace); TH_API void THNN_CudaHardTanh_updateGradInput( THCState *state, THCudaTensor *input, THCudaTensor *gradOutput, THCudaTensor *gradInput, float min_val, - float max_val); + float max_val, + bool inplace); TH_API void THNN_CudaL1Cost_updateOutput( THCState *state, @@ -403,18 +405,6 @@ TH_API void THNN_CudaThreshold_updateGradInput( double threshold, bool inplace); -TH_API void THNN_CudaReLU6_updateOutput( - THCState *state, - THCudaTensor *input, - THCudaTensor *output, - bool inplace); -TH_API void THNN_CudaReLU6_updateGradInput( - THCState *state, - THCudaTensor *input, - THCudaTensor *gradOutput, - THCudaTensor *gradInput, - bool inplace); - TH_API void THNN_CudaTemporalConvolution_updateOutput( THCState *state, THCudaTensor *input,