Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
alalek committed Sep 25, 2019
2 parents 9f4f900 + 7ce9428 commit e2a5a6a
Show file tree
Hide file tree
Showing 50 changed files with 5,957 additions and 163 deletions.
84 changes: 84 additions & 0 deletions doc/js_tutorials/js_setup/js_setup/js_setup.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,90 @@ Building OpenCV.js from Source
@note
It requires `node` installed in your development environment.

-# [optional] To build `opencv.js` with threads optimization, append `--threads` option.

For example:
@code{.bash}
python ./platforms/js/build_js.py build_js --build_wasm --threads
@endcode

The default threads number is the logic core number of your device. You can use `cv.parallel_pthreads_set_threads_num(number)` to set threads number by yourself and use `cv.parallel_pthreads_get_threads_num()` to get the current threads number.

@note
You should build wasm version of `opencv.js` if you want to enable this optimization. And the threads optimization only works in browser, not in node.js. You need to enable the `WebAssembly threads support` feature first with your browser. For example, if you use Chrome, please enable this flag in chrome://flags.

-# [optional] To build `opencv.js` with wasm simd optimization, append `--simd` option.

For example:
@code{.bash}
python ./platforms/js/build_js.py build_js --build_wasm --simd
@endcode

The simd optimization is experimental as wasm simd is still in development.

@note
Now only emscripten LLVM upstream backend supports wasm simd, refering to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first:
@code{.bash}
./emsdk update
./emsdk install latest-upstream
./emsdk activate latest-upstream
source ./emsdk_env.sh
@endcode

@note
You should build wasm version of `opencv.js` if you want to enable this optimization. For browser, you need to enable the `WebAssembly SIMD support` feature first. For example, if you use Chrome, please enable this flag in chrome://flags. For Node.js, you need to run script with flag `--experimental-wasm-simd`.

@note
The simd version of `opencv.js` built by latest LLVM upstream may not work with the stable browser or old version of Node.js. Please use the latest version of unstable browser or Node.js to get new features, like `Chrome Dev`.

-# [optional] To build wasm intrinsics tests, append `--build_wasm_intrin_test` option.

For example:
@code{.bash}
python ./platforms/js/build_js.py build_js --build_wasm --simd --build_wasm_intrin_test
@endcode

For wasm intrinsics tests, you can use the following function to test all the cases:
@code{.js}
cv.test_hal_intrin_all()
@endcode

And the failed cases will be logged in the JavaScript debug console.

If you only want to test single data type of wasm intrinsics, you can use the following functions:
@code{.js}
cv.test_hal_intrin_uint8()
cv.test_hal_intrin_int8()
cv.test_hal_intrin_uint16()
cv.test_hal_intrin_int16()
cv.test_hal_intrin_uint32()
cv.test_hal_intrin_int32()
cv.test_hal_intrin_uint64()
cv.test_hal_intrin_int64()
cv.test_hal_intrin_float32()
cv.test_hal_intrin_float64()
@endcode

-# [optional] To build performance tests, append `--build_perf` option.

For example:
@code{.bash}
python ./platforms/js/build_js.py build_js --build_perf
@endcode

To run performance tests, launch a local web server in \<build_dir\>/bin folder. For example, node http-server which serves on `localhost:8080`.

There are some kernels now in the performance test like `cvtColor`, `resize` and `threshold`. For example, if you want to test `threshold`, please navigate the web browser to `http://localhost:8080/perf/perf_imgproc/perf_threshold.html`. You need to input the test parameter like `(1920x1080, CV_8UC1, THRESH_BINARY)`, and then click the `Run` button to run the case. And if you don't input the parameter, it will run all the cases of this kernel.

You can also run tests using Node.js.

For example, run `threshold` with parameter `(1920x1080, CV_8UC1, THRESH_BINARY)`:
@code{.sh}
cd bin/perf
npm install
node perf_threshold.js --test_param_filter="(1920x1080, CV_8UC1, THRESH_BINARY)"
@endcode

Building OpenCV.js with Docker
---------------------------------------

Expand Down
9 changes: 9 additions & 0 deletions modules/core/include/opencv2/core/cv_cpu_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@
# define CV_MSA 1
#endif

#ifdef __EMSCRIPTEN__
# define CV_WASM_SIMD 1
# include <wasm_simd128.h>
#endif

#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__

#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
Expand Down Expand Up @@ -328,3 +333,7 @@ struct VZeroUpperGuard {
#ifndef CV_MSA
# define CV_MSA 0
#endif

#ifndef CV_WASM_SIMD
# define CV_WASM_SIMD 0
#endif
1 change: 1 addition & 0 deletions modules/core/include/opencv2/core/cvdef.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,7 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType)
#endif

#define CV_CXX_MOVE_SEMANTICS 1
#define CV_CXX_MOVE(x) std::move(x)
#define CV_CXX_STD_ARRAY 1
#include <array>
#ifndef CV_OVERRIDE
Expand Down
5 changes: 4 additions & 1 deletion modules/core/include/opencv2/core/hal/intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
# undef CV_MSA
#endif

#if CV_SSE2 || CV_NEON || CV_VSX || CV_MSA
#if CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD
#define CV__SIMD_FORWARD 128
#include "opencv2/core/hal/intrin_forward.hpp"
#endif
Expand All @@ -190,6 +190,9 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;

#include "opencv2/core/hal/intrin_msa.hpp"

#elif CV_WASM_SIMD
#include "opencv2/core/hal/intrin_wasm.hpp"

#else

#define CV_SIMD128_CPP 1
Expand Down
56 changes: 22 additions & 34 deletions modules/core/include/opencv2/core/hal/intrin_avx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,11 @@ inline int v_signmask(const v_int32x8& a)
inline int v_signmask(const v_uint32x8& a)
{ return v_signmask(v_reinterpret_as_f32(a)); }

inline int v_signmask(const v_int64x4& a)
{ return v_signmask(v_reinterpret_as_f64(a)); }
inline int v_signmask(const v_uint64x4& a)
{ return v_signmask(v_reinterpret_as_f64(a)); }

inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
Expand All @@ -1253,40 +1258,23 @@ inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signma
inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }

/** Checks **/
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, and_op, allmask) \
inline bool v_check_all(const _Tpvec& a) \
{ \
int mask = v_signmask(v_reinterpret_as_s8(a)); \
return and_op(mask, allmask) == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(v_reinterpret_as_s8(a)); \
return and_op(mask, allmask) != 0; \
}

OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, OPENCV_HAL_1ST, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, OPENCV_HAL_1ST, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint16x16, OPENCV_HAL_AND, (int)0xaaaaaaaa)
OPENCV_HAL_IMPL_AVX_CHECK(v_int16x16, OPENCV_HAL_AND, (int)0xaaaaaaaa)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, OPENCV_HAL_AND, (int)0x88888888)
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, OPENCV_HAL_AND, (int)0x88888888)

#define OPENCV_HAL_IMPL_AVX_CHECK_FLT(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask != 0; \
}

OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float64x4, 15)

#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \
inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; }
OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15)
OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15)
OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15)

#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \
inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \
inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; }
OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16)
OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16)

////////// Other math /////////

Expand Down
6 changes: 3 additions & 3 deletions modules/core/include/opencv2/core/hal/intrin_cpp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ Returned value is a bit mask with bits set to 1 on places corresponding to negat
v_int32x4 r; // set to {-1, -1, 1, 1}
int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
@endcode
For all types except 64-bit. */
*/
template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
{
int mask = 0;
Expand Down Expand Up @@ -1109,7 +1109,7 @@ template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a)
/** @brief Check if all packed values are less than zero
Unsigned values will be casted to signed: `uchar 254 => char -2`.
For all types except 64-bit. */
*/
template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
{
for( int i = 0; i < n; i++ )
Expand All @@ -1121,7 +1121,7 @@ template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
/** @brief Check if any of packed values is less than zero
Unsigned values will be casted to signed: `uchar 254 => char -2`.
For all types except 64-bit. */
*/
template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
{
for( int i = 0; i < n; i++ )
Expand Down
20 changes: 14 additions & 6 deletions modules/core/include/opencv2/core/hal/intrin_neon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1139,9 +1139,17 @@ inline bool v_check_any(const v_##_Tpvec& a) \
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63)
#endif

inline bool v_check_all(const v_uint64x2& a)
{
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1;
}
inline bool v_check_any(const v_uint64x2& a)
{
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0;
}

inline bool v_check_all(const v_int8x16& a)
{ return v_check_all(v_reinterpret_as_u8(a)); }
Expand All @@ -1161,13 +1169,13 @@ inline bool v_check_any(const v_int32x4& a)
inline bool v_check_any(const v_float32x4& a)
{ return v_check_any(v_reinterpret_as_u32(a)); }

#if CV_SIMD128_64F
inline bool v_check_all(const v_int64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_all(const v_float64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_any(const v_int64x2& a)
{ return v_check_any(v_reinterpret_as_u64(a)); }
#if CV_SIMD128_64F
inline bool v_check_all(const v_float64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_any(const v_float64x2& a)
{ return v_check_any(v_reinterpret_as_u64(a)); }
#endif
Expand Down
44 changes: 19 additions & 25 deletions modules/core/include/opencv2/core/hal/intrin_sse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1591,31 +1591,25 @@ inline v_uint32x4 v_popcount(const v_int32x4& a)
inline v_uint64x2 v_popcount(const v_int64x2& a)
{ return v_popcount(v_reinterpret_as_u64(a)); }

#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \
inline int v_signmask(const _Tpvec& a) \
{ \
return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
} \
inline bool v_check_all(const _Tpvec& a) \
{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
inline bool v_check_any(const _Tpvec& a) \
{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }

#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a)
inline __m128i v_packq_epi32(__m128i a)
{
__m128i b = _mm_packs_epi32(a, a);
return _mm_packs_epi16(b, b);
}

OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \
inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \
inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \
inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; }
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3)

#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \
inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \
inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \
inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; }
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8)

inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
Expand Down
4 changes: 4 additions & 0 deletions modules/core/include/opencv2/core/hal/intrin_vsx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,8 @@ inline bool v_check_all(const v_uint16x8& a)
{ return v_check_all(v_reinterpret_as_s16(a)); }
inline bool v_check_all(const v_uint32x4& a)
{ return v_check_all(v_reinterpret_as_s32(a)); }
inline bool v_check_all(const v_uint64x2& a)
{ return v_check_all(v_reinterpret_as_s64(a)); }
inline bool v_check_all(const v_float32x4& a)
{ return v_check_all(v_reinterpret_as_s32(a)); }
inline bool v_check_all(const v_float64x2& a)
Expand All @@ -913,6 +915,8 @@ inline bool v_check_any(const v_uint16x8& a)
{ return v_check_any(v_reinterpret_as_s16(a)); }
inline bool v_check_any(const v_uint32x4& a)
{ return v_check_any(v_reinterpret_as_s32(a)); }
inline bool v_check_any(const v_uint64x2& a)
{ return v_check_any(v_reinterpret_as_s64(a)); }
inline bool v_check_any(const v_float32x4& a)
{ return v_check_any(v_reinterpret_as_s32(a)); }
inline bool v_check_any(const v_float64x2& a)
Expand Down
Loading

0 comments on commit e2a5a6a

Please sign in to comment.