Skip to content

Commit

Permalink
core(intrin): drop hasSIMD128 checks
Browse files Browse the repository at this point in the history
- use compile-time checks instead (`#if CV_SIMD128`)
- runtime checks are useless
  • Loading branch information
alalek committed Jun 8, 2019
1 parent 254f88f commit 1e9ad54
Show file tree
Hide file tree
Showing 14 changed files with 27 additions and 166 deletions.
18 changes: 1 addition & 17 deletions modules/calib3d/src/stereobm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,6 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);
uchar val0 = tab[0 + OFS];

#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif

for( y = 0; y < size.height-1; y += 2 )
{
const uchar* srow1 = src.ptr<uchar>(y);
Expand All @@ -221,7 +217,6 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
x = 1;

#if CV_SIMD128
if( useSIMD )
{
v_int16x8 ftz = v_setall_s16((short) ftzero);
v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2));
Expand Down Expand Up @@ -268,7 +263,6 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
uchar* dptr = dst.ptr<uchar>(y);
x = 0;
#if CV_SIMD128
if( useSIMD )
{
v_uint8x16 val0_16 = v_setall_u8(val0);
for(; x <= size.width-16; x+=16 )
Expand Down Expand Up @@ -594,8 +588,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
mType FILTERED = (mType)((mindisp - 1) << disp_shift);

#if CV_SIMD128
bool useSIMD = hasSIMD128();
if( useSIMD )
{
CV_Assert (ndisp % 8 == 0);
}
Expand Down Expand Up @@ -637,7 +629,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int lval = lptr[0];
d = 0;
#if CV_SIMD128
if( useSIMD )
{
v_uint8x16 lv = v_setall_u8((uchar)lval);

Expand Down Expand Up @@ -706,7 +697,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int lval = lptr[0];
d = 0;
#if CV_SIMD128
if( useSIMD )
{
v_uint8x16 lv = v_setall_u8((uchar)lval);
for( ; d <= ndisp - 16; d += 16 )
Expand Down Expand Up @@ -769,7 +759,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
{
d = 0;
#if CV_SIMD128
if( useSIMD )
{
for( d = 0; d <= ndisp-8; d += 8 )
{
Expand Down Expand Up @@ -799,7 +788,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
d = 0;
#if CV_SIMD128
if( useSIMD )
{
v_int32x4 d0_4 = v_int32x4(0, 1, 2, 3);
v_int32x4 dd_4 = v_setall_s32(4);
Expand Down Expand Up @@ -1003,9 +991,6 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
validDisparityRect = _validDisparityRect;
slidingSumBuf = &_slidingSumBuf;
cost = &_cost;
#if CV_SIMD128
useSIMD = hasSIMD128();
#endif
}

void operator()(const Range& range) const CV_OVERRIDE
Expand Down Expand Up @@ -1043,7 +1028,7 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();

#if CV_SIMD128
if( useSIMD && useShorts )
if (useShorts)
{
if( disp_i.type() == CV_16S)
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
Expand Down Expand Up @@ -1083,7 +1068,6 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
size_t stripeBufSize;
bool useShorts;
Rect validDisparityRect;
bool useSIMD;
};

class StereoBMImpl CV_FINAL : public StereoBM
Expand Down
43 changes: 13 additions & 30 deletions modules/calib3d/src/stereosgbm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,6 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int width2 = maxX2 - minX2;
const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif

tab += tabOfs;

Expand Down Expand Up @@ -224,7 +221,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int u1 = std::max(ul, ur); u1 = std::max(u1, u);

#if CV_SIMD128
if( useSIMD )
if (true)
{
v_uint8x16 _u = v_setall_u8((uchar)u), _u0 = v_setall_u8((uchar)u0);
v_uint8x16 _u1 = v_setall_u8((uchar)u1);
Expand Down Expand Up @@ -304,8 +301,6 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
static const v_uint16x8 v_LSB = v_uint16x8(0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);

bool useSIMD = hasSIMD128();
#endif

const int ALIGN = 16;
Expand Down Expand Up @@ -450,7 +445,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);

#if CV_SIMD128
if( useSIMD )
if (true)
{
for( d = 0; d < D; d += 8 )
{
Expand Down Expand Up @@ -547,7 +542,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
CostType* Sp = S + x*D;

#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);

Expand Down Expand Up @@ -681,7 +676,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
const CostType* Cp = C + x*D;

#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);
v_int16x8 _delta0 = v_setall_s16((short)delta0);
Expand Down Expand Up @@ -753,7 +748,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
else
{
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _minS = v_setall_s16(MAX_COST), _bestDisp = v_setall_s16(-1);
v_int16x8 _d8 = v_int16x8(0, 1, 2, 3, 4, 5, 6, 7), _8 = v_setall_s16(8);
Expand Down Expand Up @@ -868,7 +863,6 @@ struct CalcVerticalSums: public ParallelLoopBody
Cbuf = alignedBuf;
Sbuf = Cbuf + CSBufSize;
hsumBuf = Sbuf + CSBufSize;
useSIMD = hasSIMD128();
}

void operator()(const Range& range) const CV_OVERRIDE
Expand Down Expand Up @@ -957,7 +951,7 @@ struct CalcVerticalSums: public ParallelLoopBody
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);

#if CV_SIMD128
if( useSIMD )
if (true)
{
for( d = 0; d < D; d += 8 )
{
Expand Down Expand Up @@ -1034,7 +1028,7 @@ struct CalcVerticalSums: public ParallelLoopBody
CostType* Sp = S + x*D;

#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);

Expand Down Expand Up @@ -1121,7 +1115,6 @@ struct CalcVerticalSums: public ParallelLoopBody
size_t LrSize;
size_t hsumBufNRows;
int ftzero;
bool useSIMD;
};

struct CalcHorizontalSums: public ParallelLoopBody
Expand Down Expand Up @@ -1149,7 +1142,6 @@ struct CalcHorizontalSums: public ParallelLoopBody
LrSize = 2 * D2;
Cbuf = alignedBuf;
Sbuf = Cbuf + CSBufSize;
useSIMD = hasSIMD128();
}

void operator()(const Range& range) const CV_OVERRIDE
Expand Down Expand Up @@ -1204,7 +1196,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
CostType* Sp = S + x*D;

#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);

Expand Down Expand Up @@ -1277,7 +1269,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
minLr = MAX_COST;

#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);

Expand Down Expand Up @@ -1424,7 +1416,6 @@ struct CalcHorizontalSums: public ParallelLoopBody
int INVALID_DISP_SCALED;
int uniquenessRatio;
int disp12MaxDiff;
bool useSIMD;
};
/*
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
Expand Down Expand Up @@ -1536,10 +1527,6 @@ struct SGBM3WayMainLoop : public ParallelLoopBody
int costBufSize, hsumBufNRows;
int TAB_OFS, ftzero;

#if CV_SIMD128
bool useSIMD;
#endif

PixType* clipTab;

SGBM3WayMainLoop(Mat *_buffers, const Mat& _img1, const Mat& _img2, Mat* _dst_disp, const StereoSGBMParams& params, PixType* _clipTab, int _nstripes, int _stripe_overlap);
Expand Down Expand Up @@ -1569,10 +1556,6 @@ buffers(_buffers), img1(&_img1), img2(&_img2), dst_disp(_dst_disp), clipTab(_cli
hsumBufNRows = SH2*2 + 2;
TAB_OFS = 256*4;
ftzero = std::max(params.preFilterCap, 15) | 1;

#if CV_SIMD128
useSIMD = hasSIMD128();
#endif
}

void getBufferPointers(Mat& buffer, int width, int width1, int D, int num_ch, int SH2, int P2,
Expand Down Expand Up @@ -1673,7 +1656,7 @@ void SGBM3WayMainLoop::getRawMatchingCost(CostType* C, // target cost-volume row
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);

#if CV_SIMD128
if(useSIMD)
if (true)
{
v_int16x8 hv_reg;
for( d = 0; d < D; d+=8 )
Expand Down Expand Up @@ -1734,7 +1717,7 @@ inline void accumulateCostsLeftTop(CostType* leftBuf, CostType* leftBuf_prev, Co
CostType& leftMinCost, CostType& topMinCost, int D, int P1, int P2)
{
#if CV_SIMD128
if(hasSIMD128())
if (true)
{
v_int16x8 P1_reg = v_setall_s16(cv::saturate_cast<CostType>(P1));

Expand Down Expand Up @@ -1846,7 +1829,7 @@ inline void accumulateCostsRight(CostType* rightBuf, CostType* topBuf, CostType*
CostType& rightMinCost, int D, int P1, int P2, int& optimal_disp, CostType& min_cost)
{
#if CV_SIMD128
if(hasSIMD128())
if (true)
{
v_int16x8 P1_reg = v_setall_s16(cv::saturate_cast<CostType>(P1));

Expand Down Expand Up @@ -2012,7 +1995,7 @@ void SGBM3WayMainLoop::operator () (const Range& range) const
if(uniquenessRatio>0)
{
#if CV_SIMD128
if(useSIMD)
if (true)
{
horPassCostVolume+=x;
int thresh = (100*min_cost)/(100-uniquenessRatio);
Expand Down
9 changes: 0 additions & 9 deletions modules/core/include/opencv2/core/hal/intrin_avx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2774,15 +2774,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)

inline void v256_cleanup() { _mm256_zeroall(); }

//! @name Check SIMD256 support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD256()
{
return (CV_CPU_HAS_SUPPORT_AVX2) ? true : false;
}
//! @}

CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END

//! @endcond
Expand Down
10 changes: 0 additions & 10 deletions modules/core/include/opencv2/core/hal/intrin_cpp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2357,16 +2357,6 @@ inline void v_cleanup() {}

//! @}

//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return false;
}

//! @}

#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
Expand Down
10 changes: 0 additions & 10 deletions modules/core/include/opencv2/core/hal/intrin_neon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1910,16 +1910,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)

inline void v_cleanup() {}

//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return (CV_CPU_HAS_SUPPORT_NEON) ? true : false;
}

//! @}

CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END

//! @endcond
Expand Down
10 changes: 0 additions & 10 deletions modules/core/include/opencv2/core/hal/intrin_sse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3043,16 +3043,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)

inline void v_cleanup() {}

//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return (CV_CPU_HAS_SUPPORT_SSE2) ? true : false;
}

//! @}

CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END

//! @endcond
Expand Down
10 changes: 0 additions & 10 deletions modules/core/include/opencv2/core/hal/intrin_vsx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1355,16 +1355,6 @@ OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)

//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
}

//! @}

CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END

//! @endcond
Expand Down
2 changes: 0 additions & 2 deletions modules/features2d/src/fast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
#if CV_SIMD128
const int quarterPatternSize = patternSize/4;
v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K);
bool hasSimd = hasSIMD128();
#if CV_TRY_AVX2
Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2;
if(CV_CPU_HAS_SUPPORT_AVX2)
Expand Down Expand Up @@ -102,7 +101,6 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
{
j = 3;
#if CV_SIMD128
if( hasSimd )
{
if( patternSize == 16 )
{
Expand Down
Loading

0 comments on commit 1e9ad54

Please sign in to comment.