Skip to content

Commit

Permalink
Merge pull request opencv#2461 from ilya-lavrenov:tapi_calc_hist
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrey Pavlenko authored and OpenCV Buildbot committed Mar 11, 2014
2 parents 70e22b6 + 208831e commit fd0ab8e
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
8 changes: 5 additions & 3 deletions modules/imgproc/src/histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1410,18 +1410,20 @@ static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32
{
int compunits = ocl::Device::getDefault().maxComputeUnits();
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
Size size = _src.size();
bool use16 = size.width % 16 == 0 && _src.offset() % 16 == 0 && _src.step() % 16 == 0;

ocl::Kernel k1("calculate_histogram", ocl::imgproc::histogram_oclsrc,
format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d", BINS, compunits, wgs));
format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D cn=%d",
BINS, compunits, wgs, use16 ? 16 : 1));
if (k1.empty())
return false;

_hist.create(BINS, 1, ddepth);
UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1),
hist = ddepth == CV_32S ? _hist.getUMat() : UMat(BINS, 1, CV_32SC1);

k1.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::PtrWriteOnly(ghist),
(int)src.total());
k1.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total());

size_t globalsize = compunits * wgs;
if (!k1.run(1, &globalsize, &wgs, false))
Expand Down
36 changes: 33 additions & 3 deletions modules/imgproc/src/opencl/histogram.cl
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,21 @@
//
//

#ifndef cn
#define cn 1
#endif

#if cn == 16
#define T uchar16
#else
#define T uchar
#endif

__kernel void calculate_histogram(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * hist, int total)
{
int lid = get_local_id(0);
int id = get_global_id(0);
int id = get_global_id(0) * cn;
int gid = get_group_id(0);

__local int localhist[BINS];
Expand All @@ -50,10 +60,30 @@ __kernel void calculate_histogram(__global const uchar * src, int src_step, int
localhist[i] = 0;
barrier(CLK_LOCAL_MEM_FENCE);

for (int grain = HISTS_COUNT * WGS; id < total; id += grain)
for (int grain = HISTS_COUNT * WGS * cn; id < total; id += grain)
{
int src_index = mad24(id / src_cols, src_step, src_offset + id % src_cols);
atomic_inc(localhist + (int)src[src_index]);
#if cn == 1
atomic_inc(localhist + convert_int(src[src_index]));
#else
T value = *(__global const T *)(src + src_index);
atomic_inc(localhist + convert_int(value.s0));
atomic_inc(localhist + convert_int(value.s1));
atomic_inc(localhist + convert_int(value.s2));
atomic_inc(localhist + convert_int(value.s3));
atomic_inc(localhist + convert_int(value.s4));
atomic_inc(localhist + convert_int(value.s5));
atomic_inc(localhist + convert_int(value.s6));
atomic_inc(localhist + convert_int(value.s7));
atomic_inc(localhist + convert_int(value.s8));
atomic_inc(localhist + convert_int(value.s9));
atomic_inc(localhist + convert_int(value.sA));
atomic_inc(localhist + convert_int(value.sB));
atomic_inc(localhist + convert_int(value.sC));
atomic_inc(localhist + convert_int(value.sD));
atomic_inc(localhist + convert_int(value.sE));
atomic_inc(localhist + convert_int(value.sF));
#endif
}
barrier(CLK_LOCAL_MEM_FENCE);

Expand Down

0 comments on commit fd0ab8e

Please sign in to comment.