Skip to content

Commit f46eff4

Browse files
committed
Merge pull request opencv#10492 from pengli:dnn
2 parents 7abaae3 + 1073175 commit f46eff4

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

modules/dnn/src/layers/proposal_layer.cpp

+95
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,89 @@ class ProposalLayerImpl : public ProposalLayer
148148
deltasPermute->finalize(layerInputs, layerOutputs);
149149
}
150150

151+
#ifdef HAVE_OPENCL
152+
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
153+
{
154+
std::vector<UMat> inputs;
155+
std::vector<UMat> outputs;
156+
std::vector<UMat> internals;
157+
158+
inputs_.getUMatVector(inputs);
159+
outputs_.getUMatVector(outputs);
160+
internals_.getUMatVector(internals);
161+
162+
CV_Assert(inputs.size() == 3);
163+
CV_Assert(internals.size() == 3);
164+
const UMat& scores = inputs[0];
165+
const UMat& bboxDeltas = inputs[1];
166+
const UMat& imInfo = inputs[2];
167+
UMat& priorBoxes = internals[0];
168+
UMat& permuttedScores = internals[1];
169+
UMat& permuttedDeltas = internals[2];
170+
171+
CV_Assert(imInfo.total() >= 2);
172+
// We've chosen the smallest data type because we need just a shape from it.
173+
Mat szMat;
174+
imInfo.copyTo(szMat);
175+
int rows = (int)szMat.at<float>(0);
176+
int cols = (int)szMat.at<float>(1);
177+
umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
178+
umat_fakeImageBlob.setTo(0);
179+
180+
// Generate prior boxes.
181+
std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
182+
layerInputs[0] = scores;
183+
layerInputs[1] = umat_fakeImageBlob;
184+
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
185+
186+
// Permute scores.
187+
layerInputs.assign(1, getObjectScores(scores));
188+
layerOutputs.assign(1, permuttedScores);
189+
scoresPermute->forward(layerInputs, layerOutputs, internals);
190+
191+
// Permute deltas.
192+
layerInputs.assign(1, bboxDeltas);
193+
layerOutputs.assign(1, permuttedDeltas);
194+
deltasPermute->forward(layerInputs, layerOutputs, internals);
195+
196+
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
197+
// output internally because of different number of objects after NMS.
198+
layerInputs.resize(4);
199+
layerInputs[0] = permuttedDeltas;
200+
layerInputs[1] = permuttedScores;
201+
layerInputs[2] = priorBoxes;
202+
layerInputs[3] = umat_fakeImageBlob;
203+
204+
layerOutputs[0] = UMat();
205+
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
206+
207+
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
208+
// equal to keepTopAfterNMS. We fill the rest by zeros.
209+
const int numDets = layerOutputs[0].total() / 7;
210+
CV_Assert(numDets <= keepTopAfterNMS);
211+
212+
MatShape s = shape(numDets, 7);
213+
UMat src = layerOutputs[0].reshape(1, s.size(), &s[0]).colRange(3, 7);
214+
UMat dst = outputs[0].rowRange(0, numDets);
215+
src.copyTo(dst.colRange(1, 5));
216+
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
217+
218+
if (numDets < keepTopAfterNMS)
219+
outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0);
220+
221+
return true;
222+
}
223+
#endif
224+
151225
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
152226
{
153227
CV_TRACE_FUNCTION();
154228
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
155229

230+
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
231+
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
232+
forward_ocl(inputs_arr, outputs_arr, internals_arr))
233+
156234
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
157235
}
158236

@@ -226,13 +304,30 @@ class ProposalLayerImpl : public ProposalLayer
226304
return slice(m, Range::all(), Range(channels / 2, channels));
227305
}
228306

307+
#ifdef HAVE_OPENCL
308+
static UMat getObjectScores(const UMat& m)
309+
{
310+
CV_Assert(m.dims == 4);
311+
CV_Assert(m.size[0] == 1);
312+
int channels = m.size[1];
313+
CV_Assert((channels & 1) == 0);
314+
315+
Range r = Range(channels / 2, channels);
316+
Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
317+
return m(&ranges[0]);
318+
}
319+
#endif
320+
229321
Ptr<PriorBoxLayer> priorBoxLayer;
230322
Ptr<DetectionOutputLayer> detectionOutputLayer;
231323

232324
Ptr<PermuteLayer> deltasPermute;
233325
Ptr<PermuteLayer> scoresPermute;
234326
uint32_t keepTopAfterNMS;
235327
Mat fakeImageBlob;
328+
#ifdef HAVE_OPENCL
329+
UMat umat_fakeImageBlob;
330+
#endif
236331
};
237332

238333

samples/dnn/resnet_ssd_face.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ const char* params
3030
"{ model | | model weights (res10_300x300_ssd_iter_140000.caffemodel) }"
3131
"{ camera_device | 0 | camera device number }"
3232
"{ video | | video or image for detection }"
33+
"{ opencl | false | enable OpenCL }"
3334
"{ min_confidence | 0.5 | min confidence }";
3435

3536
int main(int argc, char** argv)
@@ -62,6 +63,11 @@ int main(int argc, char** argv)
6263
exit(-1);
6364
}
6465

66+
if (parser.get<bool>("opencl"))
67+
{
68+
net.setPreferableTarget(DNN_TARGET_OPENCL);
69+
}
70+
6571
VideoCapture cap;
6672
if (parser.get<String>("video").empty())
6773
{

0 commit comments

Comments
 (0)