Skip to content

Commit

Permalink
Merge pull request opencv#2459 from ilya-lavrenov:tapi_experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrey Pavlenko authored and OpenCV Buildbot committed Mar 13, 2014
2 parents 6b6cfa8 + f138b61 commit 0764a23
Show file tree
Hide file tree
Showing 12 changed files with 239 additions and 140 deletions.
37 changes: 20 additions & 17 deletions modules/core/include/opencv2/core/ocl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ class CV_EXPORTS Device
VENDOR_NVIDIA=3
};
int vendorID() const;
inline bool isAMD() const { return vendorID() == VENDOR_AMD; };
inline bool isIntel() const { return vendorID() == VENDOR_INTEL; };
inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }

int maxClockFrequency() const;
int maxComputeUnits() const;
Expand Down Expand Up @@ -286,7 +286,7 @@ class CV_EXPORTS KernelArg
{
public:
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
KernelArg(int _flags, UMat* _m, int wscale=1, const void* _obj=0, size_t _sz=0);
KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
KernelArg();

static KernelArg Local() { return KernelArg(LOCAL, 0); }
Expand All @@ -296,27 +296,27 @@ class CV_EXPORTS KernelArg
{ return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
static KernelArg PtrReadWrite(const UMat& m)
{ return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
static KernelArg ReadWrite(const UMat& m, int wscale=1)
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale); }
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1)
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale); }
static KernelArg ReadOnly(const UMat& m, int wscale=1)
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale); }
static KernelArg WriteOnly(const UMat& m, int wscale=1)
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale); }
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1)
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale); }
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1)
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale); }
static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
static KernelArg Constant(const Mat& m);
template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
{ return KernelArg(CONSTANT, 0, 1, (void*)arr, n); }
{ return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }

int flags;
UMat* m;
const void* obj;
size_t sz;
int wscale;
int wscale, iwscale;
};


Expand Down Expand Up @@ -590,6 +590,9 @@ CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());

class CV_EXPORTS Image2D
{
Expand Down
69 changes: 36 additions & 33 deletions modules/core/src/arithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -933,17 +933,16 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int cn = CV_MAT_CN(srctype);

bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) ||
(!doubleSupport && srcdepth == CV_64F && !bitwise))
return false;

char opts[1024];
int kercn = haveMask || haveScalar ? cn : 1;
int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
int scalarcn = kercn == 3 ? 4 : kercn;

sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop],
haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop],
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "",
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) :
Expand All @@ -953,16 +952,15 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
kercn);

ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if( k.empty() )
if (k.empty())
return false;

UMat src1 = _src1.getUMat(), src2;
UMat dst = _dst.getUMat(), mask = _mask.getUMat();

int cscale = cn/kercn;
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale);
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) :
ocl::KernelArg::WriteOnly(dst, cscale);
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
ocl::KernelArg::WriteOnly(dst, cn, kercn);
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);

if( haveScalar )
Expand All @@ -976,7 +974,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1);
}

ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);

if( !haveMask )
k.args(src1arg, dstarg, scalararg);
Expand All @@ -986,15 +984,15 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
else
{
src2 = _src2.getUMat();
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);

if( !haveMask )
k.args(src1arg, src2arg, dstarg);
else
k.args(src1arg, src2arg, maskarg, dstarg);
}

size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows };
size_t globalsize[] = { src1.cols * cn / kercn, src1.rows };
return k.run(2, globalsize, 0, false);
}

Expand Down Expand Up @@ -1313,7 +1311,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if (!doubleSupport && (depth2 == CV_64F || depth1 == CV_64F))
return false;

int kercn = haveMask || haveScalar ? cn : 1;
int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
int scalarcn = kercn == 3 ? 4 : kercn;

char cvtstr[4][32], opts[1024];
Expand Down Expand Up @@ -1355,11 +1353,9 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
UMat src1 = _src1.getUMat(), src2;
UMat dst = _dst.getUMat(), mask = _mask.getUMat();

int cscale = cn/kercn;

ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale);
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) :
ocl::KernelArg::WriteOnly(dst, cscale);
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
ocl::KernelArg::WriteOnly(dst, cn, kercn);
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);

if( haveScalar )
Expand All @@ -1370,15 +1366,15 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,

if( !src2sc.empty() )
convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1);
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);

if( !haveMask )
{
if(n == 0)
k.args(src1arg, dstarg, scalararg);
else if(n == 1)
k.args(src1arg, dstarg, scalararg,
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz));
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
else
CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
}
Expand All @@ -1388,28 +1384,28 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
else
{
src2 = _src2.getUMat();
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);

if( !haveMask )
{
if(n == 0)
k.args(src1arg, src2arg, dstarg);
else if(n == 1)
k.args(src1arg, src2arg, dstarg,
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz));
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
else if(n == 3)
k.args(src1arg, src2arg, dstarg,
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz),
ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz),
ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
else
CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
}
else
k.args(src1arg, src2arg, maskarg, dstarg);
}

size_t globalsize[] = { src1.cols * cscale, src1.rows };
size_t globalsize[] = { src1.cols * cn / kercn, src1.rows };
return k.run(2, globalsize, NULL, false);
}

Expand Down Expand Up @@ -2628,16 +2624,23 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in

bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), type2 = _src2.type();
if (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F))
if ( (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F)) ||
!_src1.sameSize(_src2) || type != type2)
return false;

int kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
char cvt[40];

ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D BINARY_OP -D srcT1=%s -D workT=srcT1 -D cn=1"
" -D OP_CMP -D CMP_OPERATOR=%s%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
operationMap[op],
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
format("-D BINARY_OP -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
" -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s%s -D srcT1_C1=%s"
" -D srcT2_C1=%s -D dstT_C1=%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
ocl::typeToStr(CV_8UC(kercn)), kercn,
ocl::convertTypeStr(depth, CV_8U, kercn, cvt),
operationMap[op], doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(depth), ocl::typeToStr(depth), ocl::typeToStr(CV_8U)));
if (k.empty())
return false;

Expand All @@ -2651,9 +2654,9 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in

k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
ocl::KernelArg::ReadOnlyNoSize(src2),
ocl::KernelArg::WriteOnly(dst, cn));
ocl::KernelArg::WriteOnly(dst, cn, kercn));

size_t globalsize[2] = { dst.cols * cn, dst.rows };
size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
return k.run(2, globalsize, NULL, false);
}

Expand Down
25 changes: 15 additions & 10 deletions modules/core/src/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1310,7 +1310,8 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)

static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = ocl::predictOptimalVectorWidth(_src, _dst);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;

if (!doubleSupport && depth == CV_64F)
Expand All @@ -1319,27 +1320,31 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
char cvt[2][50];
int wdepth = std::max(depth, CV_32F);
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s%s",
ocl::typeToStr(depth), ocl::typeToStr(wdepth), wdepth,
ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
ocl::convertTypeStr(wdepth, CV_8U, 1, cvt[1]),
format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s -D workT1=%s%s",
ocl::typeToStr(CV_8UC(kercn)),
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), wdepth,
ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
ocl::convertTypeStr(wdepth, CV_8U, kercn, cvt[1]),
ocl::typeToStr(wdepth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;

_dst.createSameSize(_src, CV_8UC(cn));
UMat src = _src.getUMat(), dst = _dst.getUMat();
UMat src = _src.getUMat();
_dst.create(src.size(), CV_8UC(cn));
UMat dst = _dst.getUMat();

ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);

if (wdepth == CV_32F)
k.args(srcarg, dstarg, (float)alpha, (float)beta);
else if (wdepth == CV_64F)
k.args(srcarg, dstarg, alpha, beta);

size_t globalsize[2] = { src.cols * cn, src.rows };
size_t globalsize[2] = { src.cols * cn / kercn, src.rows };
return k.run(2, globalsize, NULL, false);
}

Expand Down
39 changes: 17 additions & 22 deletions modules/core/src/mathfuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,40 +62,35 @@ static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGRE

static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
{
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn1 = CV_MAT_CN(type1);
int type2 = _src2.type(), cn2 = CV_MAT_CN(type2);

char opts[1024];
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int kercn = oclop == OCL_OP_PHASE_DEGREES ||
oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);

bool double_support = false;
if(ocl::Device::getDefault().doubleFPConfig() > 0)
double_support = true;
if(!double_support && depth1 == CV_64F)
bool double_support = ocl::Device::getDefault().doubleFPConfig() > 0;
if (!double_support && depth == CV_64F)
return false;

sprintf(opts, "-D %s -D %s -D dstT=%s %s", _src2.empty()?"UNARY_OP":"BINARY_OP",
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, 1) ), double_support ? "-D DOUBLE_SUPPORT" : "" );

ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if( k.empty() )
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D %s -D %s -D dstT=%s%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
double_support ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;

UMat src1 = _src1.getUMat();
UMat src2 = _src2.getUMat();
_dst.create(src1.size(), type1);
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
_dst.create(src1.size(), type);
UMat dst = _dst.getUMat();

ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn1);
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn2);
ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(dst, cn1);
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);

if(_src2.empty())
if (src2.empty())
k.args(src1arg, dstarg);
else
k.args(src1arg, src2arg, dstarg);

size_t globalsize[] = { src1.cols*cn1, src1.rows};

size_t globalsize[] = { src1.cols * cn / kercn, src1.rows };
return k.run(2, globalsize, 0, false);
}

Expand Down
Loading

0 comments on commit 0764a23

Please sign in to comment.