Skip to content

Commit

Permalink
Merge pull request opencv#25872 from fengyuentau:core/v_erf
Browse files Browse the repository at this point in the history
core: add v_erf opencv#25872

This patch adds v_erf, which is needed by opencv#25147.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
  • Loading branch information
fengyuentau authored Jul 5, 2024
1 parent 88b28ee commit d30b945
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 1 deletion.
10 changes: 9 additions & 1 deletion modules/core/include/opencv2/core/hal/intrin_cpp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ Most of these operations return only one value.
### Other math
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp,
@ref v_erf
- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
### Conversions
Expand Down Expand Up @@ -761,6 +762,13 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
#define OPENCV_HAL_MATH_HAVE_LOG 1

/**
* @brief Error function.
*
* @note Support FP32 precision for now.
*/
OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp)

//! @cond IGNORED
OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
#define OPENCV_HAL_MATH_HAVE_SIN 1
Expand Down
45 changes: 45 additions & 0 deletions modules/core/include/opencv2/core/hal/intrin_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,5 +418,50 @@ namespace CV__SIMD_NAMESPACE {
#define OPENCV_HAL_MATH_HAVE_LOG 1
//! @}
#endif

/* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch
https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220
*/

#ifndef OPENCV_HAL_MATH_HAVE_ERF

//! @name Error Function
//! @{

inline v_float32 v_erf(v_float32 v) {
const v_float32 coef0 = vx_setall_f32(0.3275911f),
coef1 = vx_setall_f32(1.061405429f),
coef2 = vx_setall_f32(-1.453152027f),
coef3 = vx_setall_f32(1.421413741f),
coef4 = vx_setall_f32(-0.284496736f),
coef5 = vx_setall_f32(0.254829592f),
ones = vx_setall_f32(1.0f),
neg_zeros = vx_setall_f32(-0.f);
v_float32 t = v_abs(v);
// sign(v)
v_float32 sign_mask = v_and(neg_zeros, v);

t = v_div(ones, v_fma(coef0, t, ones));
v_float32 r = v_fma(coef1, t, coef2);
r = v_fma(r, t, coef3);
r = v_fma(r, t, coef4);
r = v_fma(r, t, coef5);
// - v * v
v_float32 pow_2 = v_mul(v, v);
v_float32 neg_pow_2 = v_xor(neg_zeros, pow_2);
// - exp(- v * v)
v_float32 exp = v_exp(neg_pow_2);
v_float32 neg_exp = v_xor(neg_zeros, exp);
v_float32 res = v_mul(t, neg_exp);
res = v_fma(r, res, ones);
return v_xor(sign_mask, res);
}

#define OPENCV_HAL_MATH_HAVE_ERF 1
//! @}

#endif // OPENCV_HAL_MATH_HAVE_ERF


}
#endif // OPENCV_HAL_INTRIN_HPP
43 changes: 43 additions & 0 deletions modules/core/test/test_intrin_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1864,6 +1864,48 @@ template<typename R> struct TheTest
#endif
return *this;
}

TheTest &test_erf_fp32() {
int n = VTraits<R>::vlanes();

constexpr int num_loops = 10000;
const std::vector<LaneType> singular_inputs{INFINITY, -INFINITY, NAN};
constexpr double insert_singular_input_probability = 0.1;
cv::RNG_MT19937 rng;

for (int i = 0; i < num_loops; i++) {
Data<R> inputs;
for (int j = 0; j < n; j++) {
if (rng.uniform(0.f, 1.f) <= insert_singular_input_probability) {
int singular_input_index = rng.uniform(0, int(singular_inputs.size()));
inputs[j] = singular_inputs[singular_input_index];
} else {
// std::exp(float) overflows at about 88.0f.
// In v_erf, exp is called on input*input. So test range is [-sqrt(88.0f), sqrt(88.0f)]
inputs[j] = (LaneType) rng.uniform(-9.4f, 9.4f);
}
}

Data<R> outputs = v_erf(R(inputs));
for (int j = 0; j < n; j++) {
SCOPED_TRACE(cv::format("Random test value: %f", inputs[j]));
if (std::isinf(inputs[j])) {
if (inputs[j] < 0) {
EXPECT_EQ(-1, outputs[j]);
} else {
EXPECT_EQ(1, outputs[j]);
}
} else if (std::isnan(inputs[j])) {
EXPECT_TRUE(std::isnan(outputs[j]));
} else {
LaneType ref_output = std::erf(inputs[j]);
EXPECT_LT(std::abs(outputs[j] - ref_output), 1e-3f * (std::abs(ref_output) + FLT_MIN * 1e4f));
}
}
}

return *this;
}
};

#define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits<v_uint8>::vlanes(), CV__TRACE_FUNCTION);
Expand Down Expand Up @@ -2179,6 +2221,7 @@ void test_hal_intrin_float32()
.test_pack_triplets()
.test_exp_fp32()
.test_log_fp32()
.test_erf_fp32()
#if CV_SIMD_WIDTH == 32
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
Expand Down

0 comments on commit d30b945

Please sign in to comment.