diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index 646444b14a2..c443a5f69ac 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -105,6 +105,13 @@ int Convolution_arm::create_pipeline(const Option& opt) ncnn::ParamDict pd; activation->load_param(pd); } + else if (activation_type == 5) + { + activation = ncnn::create_layer(ncnn::LayerType::Mish); + + ncnn::ParamDict pd; + activation->load_param(pd); + } if (activation) { @@ -993,6 +1000,10 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option { sum = static_cast(1.f / (1.f + exp(-sum))); } + else if (activation_type == 5) + { + sum = static_cast(sum * tanh(log(exp(sum) + 1.f))); + } outptr[j] = sum; } @@ -1622,6 +1633,10 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const { sum = static_cast(1.f / (1.f + exp(-sum))); } + else if (activation_type == 5) + { + sum = static_cast(sum * tanh(log(exp(sum) + 1.f))); + } outptr[j] = float32_to_bfloat16(sum); } diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index c9e8e4beff6..285f1223748 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -83,6 +83,13 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) ncnn::ParamDict pd; activation->load_param(pd); } + else if (activation_type == 5) + { + activation = ncnn::create_layer(ncnn::LayerType::Mish); + + ncnn::ParamDict pd; + activation->load_param(pd); + } if (activation) { @@ -753,6 +760,10 @@ int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blo { sum = static_cast(1.f / (1.f + exp(-sum))); } + else if (activation_type == 5) + { + sum = static_cast(sum * tanh(log(exp(sum) + 1.f))); + } outptr[j] = float32_to_bfloat16(sum); } diff --git a/src/layer/arm/mish_arm.cpp b/src/layer/arm/mish_arm.cpp new file mode 100644 index 00000000000..d0ddb88b25e --- /dev/null +++ b/src/layer/arm/mish_arm.cpp @@ -0,0 +1,162 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "mish_arm.h" + +#if __ARM_NEON +#include +#include "neon_mathfun.h" +#endif // __ARM_NEON + +#include + +namespace ncnn { + +DEFINE_LAYER_CREATOR(Mish_arm) + +Mish_arm::Mish_arm() +{ +#if __ARM_NEON + support_packing = true; +#endif // __ARM_NEON + + support_bf16_storage = true; +} + +int Mish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const +{ + if (opt.use_bf16_storage) + return forward_inplace_bf16s(bottom_top_blob, opt); + + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + int channels = bottom_top_blob.c; + int size = w * h; + int elempack = bottom_top_blob.elempack; + +#if __ARM_NEON + if (elempack == 4) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q=0; q> 2; + int remain = size - (nn << 2); +#else + int remain = size; +#endif // __ARM_NEON + +#if __ARM_NEON + for (; nn>0; nn--) + { + float32x4_t _p = vld1q_f32(ptr); + _p = vmulq_f32(_p, tanh_ps(log_ps(vaddq_f32(exp_ps(_p), vdupq_n_f32(1.f))))); + vst1q_f32(ptr, _p); + ptr += 4; + } +#endif // __ARM_NEON + for (; remain>0; remain--) + { + *ptr = *ptr * tanh(log(exp(*ptr) + 1.f)); + ptr++; + } + } + + return 0; +} + +int Mish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const +{ + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + int channels = bottom_top_blob.c; + int size = w * h; + int elempack = bottom_top_blob.elempack; + +#if __ARM_NEON + if (elempack == 4) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q=0; q> 2; + int remain = size - (nn << 2); +#else + int remain = size; +#endif // __ARM_NEON + +#if __ARM_NEON + for (; nn>0; nn--) + { + float32x4_t _p = vreinterpretq_f32_u32(vshll_n_u16(vld1_u16(ptr), 16)); + _p = vmulq_f32(_p, tanh_ps(log_ps(vaddq_f32(exp_ps(_p), vdupq_n_f32(1.f))))); + vst1_u16(ptr, vshrn_n_u32(vreinterpretq_u32_f32(_p), 16)); + ptr += 4; + } +#endif // __ARM_NEON + for (; remain>0; remain--) + { + float v = bfloat16_to_float32(*ptr); + v = v * tanh(log(exp(v) + 1.f)); + *ptr = float32_to_bfloat16(v); + ptr++; + } + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/arm/mish_arm.h b/src/layer/arm/mish_arm.h new file mode 100644 index 00000000000..ac423eda7f2 --- /dev/null +++ b/src/layer/arm/mish_arm.h @@ -0,0 +1,35 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_MISH_ARM_H +#define LAYER_MISH_ARM_H + +#include "mish.h" + +namespace ncnn { + +class Mish_arm : virtual public Mish +{ +public: + Mish_arm(); + + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +protected: + int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const; +}; + +} // namespace ncnn + +#endif // LAYER_MISH_ARM_H diff --git a/src/layer/arm/neon_activation.h b/src/layer/arm/neon_activation.h index 81676ddf0de..93c88315c8c 100644 --- a/src/layer/arm/neon_activation.h +++ b/src/layer/arm/neon_activation.h @@ -40,6 +40,10 @@ static inline float activation_ss(float v, int activation_type, const ncnn::Mat& { v = 1.f / (1.f + exp(-v)); } + else if (activation_type == 5) + { + v = v * tanh(log(exp(v) + 1.f)); + } return v; } @@ -78,6 +82,10 @@ static inline float32x4_t activation_ps(float32x4_t _v, int activation_type, con // _outp = vmulq_f32(vrecpsq_f32(_v, _outp), _outp); _v = _outp; } + else if (activation_type == 5) + { + _v = vmulq_f32(_v, tanh_ps(log_ps(vaddq_f32(exp_ps(_v), vdupq_n_f32(1.f))))); + } return _v; } diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp index a0ad38019d7..3a8d8a01fc6 100644 --- a/src/layer/convolution.cpp +++ b/src/layer/convolution.cpp @@ -255,6 +255,18 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op { sum = static_cast(1.f / (1.f + exp(-sum))); } + else if (activation_type == 5) + { + const float MISH_THRESHOLD = 20; + float x = sum, y; + if (x > MISH_THRESHOLD) + y = x; + else if (x < -MISH_THRESHOLD) + y = expf(x); + else + y = logf(expf(x) + 1); + sum = static_cast(x * tanh(y)); + } outptr[j] = sum; } diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp index 09c59f4f504..2027b1bf6f2 100644 --- a/src/layer/convolutiondepthwise.cpp +++ b/src/layer/convolutiondepthwise.cpp @@ -238,6 +238,18 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O { sum = static_cast(1.f / (1.f + exp(-sum))); } + else if (activation_type == 5) + { + const float MISH_THRESHOLD = 20; + float x = sum, y; + if (x > MISH_THRESHOLD) + y = x; + else if (x < -MISH_THRESHOLD) + y = expf(x); + else + y = logf(expf(x) + 1); + sum = static_cast(x * tanh(y)); + } outptr[j] = sum; } @@ -313,6 +325,18 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O { sum = static_cast(1.f / (1.f + exp(-sum))); } + else if (activation_type == 5) + { + const float MISH_THRESHOLD = 20; + float x = sum, y; + if (x > MISH_THRESHOLD) + y = x; + else if (x < -MISH_THRESHOLD) + y = expf(x); + else + y = logf(expf(x) + 1); + sum = static_cast(x * tanh(y)); + } outptr[j] = sum; } diff --git a/src/layer/vulkan/shader/convolution.comp b/src/layer/vulkan/shader/convolution.comp index 7214ebbb709..1d10709500b 100644 --- a/src/layer/vulkan/shader/convolution.comp +++ b/src/layer/vulkan/shader/convolution.comp @@ -160,6 +160,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_1x1s1d1.comp index 8f0443fd20a..085775951cf 100644 --- a/src/layer/vulkan/shader/convolution_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_1x1s1d1.comp @@ -160,6 +160,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum.r); diff --git a/src/layer/vulkan/shader/convolution_pack1to4.comp b/src/layer/vulkan/shader/convolution_pack1to4.comp index 418f99ee2fd..711f44aa9bb 100644 --- a/src/layer/vulkan/shader/convolution_pack1to4.comp +++ b/src/layer/vulkan/shader/convolution_pack1to4.comp @@ -168,6 +168,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack1to8.comp b/src/layer/vulkan/shader/convolution_pack1to8.comp index 76f52ff996a..d9849b8fa80 100644 --- a/src/layer/vulkan/shader/convolution_pack1to8.comp +++ b/src/layer/vulkan/shader/convolution_pack1to8.comp @@ -177,6 +177,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack4.comp b/src/layer/vulkan/shader/convolution_pack4.comp index 0d930f26582..5a714f86df8 100644 --- a/src/layer/vulkan/shader/convolution_pack4.comp +++ b/src/layer/vulkan/shader/convolution_pack4.comp @@ -188,6 +188,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp index a7487874594..c41f207ac95 100644 --- a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp @@ -208,6 +208,13 @@ void main() sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); } + if (activation_type == 5) + { + sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); + sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); + sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); + sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp index 8f5ba51a6a9..a3596536fa7 100644 --- a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp @@ -184,6 +184,13 @@ void main() v01 = afp(1.f) / (afp(1.f) + exp(-v01)); v11 = afp(1.f) / (afp(1.f) + exp(-v11)); } + if (activation_type == 5) + { + v00 = v00 * tanh(log(exp(v00) + afp(1.f))); + v01 = v01 * tanh(log(exp(v01) + afp(1.f))); + v10 = v10 * tanh(log(exp(v10) + afp(1.f))); + v11 = v11 * tanh(log(exp(v11) + afp(1.f))); + } // store 2x2 #if NCNN_image_shader diff --git a/src/layer/vulkan/shader/convolution_pack4to1.comp b/src/layer/vulkan/shader/convolution_pack4to1.comp index 9d6822a0920..b318f756260 100644 --- a/src/layer/vulkan/shader/convolution_pack4to1.comp +++ b/src/layer/vulkan/shader/convolution_pack4to1.comp @@ -168,6 +168,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack4to8.comp b/src/layer/vulkan/shader/convolution_pack4to8.comp index fed780da886..aed8ad6a979 100644 --- a/src/layer/vulkan/shader/convolution_pack4to8.comp +++ b/src/layer/vulkan/shader/convolution_pack4to8.comp @@ -203,6 +203,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack8.comp b/src/layer/vulkan/shader/convolution_pack8.comp index 38eb84add1c..7c1d5cbc276 100644 --- a/src/layer/vulkan/shader/convolution_pack8.comp +++ b/src/layer/vulkan/shader/convolution_pack8.comp @@ -203,6 +203,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp index 8ff3031a295..e77992ff3f7 100644 --- a/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp @@ -283,6 +283,17 @@ void main() sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); } + if (activation_type == 5) + { + sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); + sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); + sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); + sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); + sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); + sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); + sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); + sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp index deacb636fb3..fe68194695a 100644 --- a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp @@ -201,6 +201,17 @@ void main() v11[0] = afp(1.f) / (afp(1.f) + exp(-v11[0])); v11[1] = afp(1.f) / (afp(1.f) + exp(-v11[1])); } + if (activation_type == 5) + { + v00[0] = v00[0] * tanh(log(exp(v00[0]) + afp(1.f))); + v00[1] = v00[1] * tanh(log(exp(v00[1]) + afp(1.f))); + v10[0] = v10[0] * tanh(log(exp(v10[0]) + afp(1.f))); + v10[1] = v10[1] * tanh(log(exp(v10[1]) + afp(1.f))); + v01[0] = v01[0] * tanh(log(exp(v01[0]) + afp(1.f))); + v01[1] = v01[1] * tanh(log(exp(v01[1]) + afp(1.f))); + v11[0] = v11[0] * tanh(log(exp(v11[0]) + afp(1.f))); + v11[1] = v11[1] * tanh(log(exp(v11[1]) + afp(1.f))); + } // store 2x2 #if NCNN_image_shader diff --git a/src/layer/vulkan/shader/convolution_pack8to1.comp b/src/layer/vulkan/shader/convolution_pack8to1.comp index c57db92b80c..8d5afd5d576 100644 --- a/src/layer/vulkan/shader/convolution_pack8to1.comp +++ b/src/layer/vulkan/shader/convolution_pack8to1.comp @@ -171,6 +171,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolution_pack8to4.comp b/src/layer/vulkan/shader/convolution_pack8to4.comp index 34bf805f3ad..a60bbffe89d 100644 --- a/src/layer/vulkan/shader/convolution_pack8to4.comp +++ b/src/layer/vulkan/shader/convolution_pack8to4.comp @@ -183,6 +183,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise.comp b/src/layer/vulkan/shader/convolutiondepthwise.comp index c8672750885..b4316deb31d 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise.comp @@ -155,6 +155,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group.comp b/src/layer/vulkan/shader/convolutiondepthwise_group.comp index a3d012a240f..0102a08611d 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group.comp @@ -175,6 +175,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp index 47e1aa043ed..06f1302ffbe 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp @@ -183,6 +183,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp index 888182c3dc0..851b7664f19 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp @@ -192,6 +192,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp index cd7a5501504..8457d035c22 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp @@ -203,6 +203,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp index 17906f03511..7503d3a6022 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp @@ -183,6 +183,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp index a847f8b0df3..6fd10d5861b 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp @@ -218,6 +218,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp index ec3009ffdc2..7ca2f208717 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp @@ -218,6 +218,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp index 4ba8bf3d533..a35478ed2d5 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp @@ -186,6 +186,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp index 3f3f8a58324..a98fac19638 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp @@ -198,6 +198,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp b/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp index 2313328ec1a..0bd4929bf08 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp @@ -163,6 +163,10 @@ void main() { sum = afp(1.f) / (afp(1.f) + exp(-sum)); } + if (activation_type == 5) + { + sum = sum * tanh(log(exp(sum) + afp(1.f))); + } #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp b/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp index c8da50e5316..b480613714c 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp @@ -172,6 +172,11 @@ void main() sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); } + if (activation_type == 5) + { + sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); + sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); + } #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index 0b682140574..ed35cddf82d 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -84,6 +84,13 @@ int Convolution_x86::create_pipeline(const Option& opt) ncnn::ParamDict pd; activation->load_param(pd); } + else if (activation_type == 5) + { + activation = ncnn::create_layer(ncnn::LayerType::Mish); + + ncnn::ParamDict pd; + activation->load_param(pd); + } if (activation) { diff --git a/src/layer/x86/convolutiondepthwise_x86.cpp b/src/layer/x86/convolutiondepthwise_x86.cpp index 343b35d28c9..a5bc77c0baf 100644 --- a/src/layer/x86/convolutiondepthwise_x86.cpp +++ b/src/layer/x86/convolutiondepthwise_x86.cpp @@ -71,6 +71,13 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) ncnn::ParamDict pd; activation->load_param(pd); } + else if (activation_type == 5) + { + activation = ncnn::create_layer(ncnn::LayerType::Mish); + + ncnn::ParamDict pd; + activation->load_param(pd); + } if (activation) { diff --git a/tests/test_convolution.cpp b/tests/test_convolution.cpp index 7a7d3b18b02..cc28a73c191 100644 --- a/tests/test_convolution.cpp +++ b/tests/test_convolution.cpp @@ -29,7 +29,7 @@ static int test_convolution(int w, int h, int c, int outch, int kernel, int dila pd.set(5, bias);// bias_term pd.set(6, outch*c*kernel*kernel); - int activation_type = RAND() % 5;// 0 1 2 3 4 + int activation_type = RAND() % 6;// 0 1 2 3 4 5 ncnn::Mat activation_params(2); activation_params[0] = RandomFloat(-1, 0);// alpha activation_params[1] = RandomFloat(0, 1);// beta diff --git a/tests/test_convolutiondepthwise.cpp b/tests/test_convolutiondepthwise.cpp index 878eaf0dedd..4474150eb9f 100644 --- a/tests/test_convolutiondepthwise.cpp +++ b/tests/test_convolutiondepthwise.cpp @@ -30,7 +30,7 @@ static int test_convolutiondepthwise(int w, int h, int c, int outch, int kernel, pd.set(6, outch/group*c/group*kernel*kernel*group); pd.set(7, group); - int activation_type = RAND() % 5;// 0 1 2 3 4 + int activation_type = RAND() % 6;// 0 1 2 3 4 5 ncnn::Mat activation_params(2); activation_params[0] = RandomFloat(-1, 0);// alpha activation_params[1] = RandomFloat(0, 1);// beta diff --git a/tools/ncnnoptimize.cpp b/tools/ncnnoptimize.cpp index fff944bdd35..ecc11330dd9 100644 --- a/tools/ncnnoptimize.cpp +++ b/tools/ncnnoptimize.cpp @@ -884,7 +884,7 @@ int NetOptimize::fuse_convolution_activation() int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") + if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid" && layers[j]->type != "Mish") continue; if (layers[j]->bottoms.size() != 1) @@ -931,6 +931,10 @@ int NetOptimize::fuse_convolution_activation() { convolution->activation_type = 4; } + else if (activation->type == "Mish") + { + convolution->activation_type = 5; + } int top_blob_index_final = activation->tops[0]; convolution->tops[0] = top_blob_index_final; @@ -955,7 +959,7 @@ int NetOptimize::fuse_convolutiondepthwise_activation() int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") + if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid" && layers[j]->type != "Mish") continue; if (layers[j]->bottoms.size() != 1) @@ -1002,6 +1006,10 @@ int NetOptimize::fuse_convolutiondepthwise_activation() { convolutiondepthwise->activation_type = 4; } + else if (activation->type == "Mish") + { + convolutiondepthwise->activation_type = 5; + } int top_blob_index_final = activation->tops[0]; convolutiondepthwise->tops[0] = top_blob_index_final;