@@ -148,11 +148,89 @@ class ProposalLayerImpl : public ProposalLayer
148
148
deltasPermute->finalize (layerInputs, layerOutputs);
149
149
}
150
150
151
+ #ifdef HAVE_OPENCL
152
+ bool forward_ocl (InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
153
+ {
154
+ std::vector<UMat> inputs;
155
+ std::vector<UMat> outputs;
156
+ std::vector<UMat> internals;
157
+
158
+ inputs_.getUMatVector (inputs);
159
+ outputs_.getUMatVector (outputs);
160
+ internals_.getUMatVector (internals);
161
+
162
+ CV_Assert (inputs.size () == 3 );
163
+ CV_Assert (internals.size () == 3 );
164
+ const UMat& scores = inputs[0 ];
165
+ const UMat& bboxDeltas = inputs[1 ];
166
+ const UMat& imInfo = inputs[2 ];
167
+ UMat& priorBoxes = internals[0 ];
168
+ UMat& permuttedScores = internals[1 ];
169
+ UMat& permuttedDeltas = internals[2 ];
170
+
171
+ CV_Assert (imInfo.total () >= 2 );
172
+ // We've chosen the smallest data type because we need just a shape from it.
173
+ Mat szMat;
174
+ imInfo.copyTo (szMat);
175
+ int rows = (int )szMat.at <float >(0 );
176
+ int cols = (int )szMat.at <float >(1 );
177
+ umat_fakeImageBlob.create (shape (1 , 1 , rows, cols), CV_8UC1);
178
+ umat_fakeImageBlob.setTo (0 );
179
+
180
+ // Generate prior boxes.
181
+ std::vector<UMat> layerInputs (2 ), layerOutputs (1 , priorBoxes);
182
+ layerInputs[0 ] = scores;
183
+ layerInputs[1 ] = umat_fakeImageBlob;
184
+ priorBoxLayer->forward (layerInputs, layerOutputs, internals);
185
+
186
+ // Permute scores.
187
+ layerInputs.assign (1 , getObjectScores (scores));
188
+ layerOutputs.assign (1 , permuttedScores);
189
+ scoresPermute->forward (layerInputs, layerOutputs, internals);
190
+
191
+ // Permute deltas.
192
+ layerInputs.assign (1 , bboxDeltas);
193
+ layerOutputs.assign (1 , permuttedDeltas);
194
+ deltasPermute->forward (layerInputs, layerOutputs, internals);
195
+
196
+ // Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
197
+ // output internally because of different number of objects after NMS.
198
+ layerInputs.resize (4 );
199
+ layerInputs[0 ] = permuttedDeltas;
200
+ layerInputs[1 ] = permuttedScores;
201
+ layerInputs[2 ] = priorBoxes;
202
+ layerInputs[3 ] = umat_fakeImageBlob;
203
+
204
+ layerOutputs[0 ] = UMat ();
205
+ detectionOutputLayer->forward (layerInputs, layerOutputs, internals);
206
+
207
+ // DetectionOutputLayer produces 1x1xNx7 output where N might be less or
208
+ // equal to keepTopAfterNMS. We fill the rest by zeros.
209
+ const int numDets = layerOutputs[0 ].total () / 7 ;
210
+ CV_Assert (numDets <= keepTopAfterNMS);
211
+
212
+ MatShape s = shape (numDets, 7 );
213
+ UMat src = layerOutputs[0 ].reshape (1 , s.size (), &s[0 ]).colRange (3 , 7 );
214
+ UMat dst = outputs[0 ].rowRange (0 , numDets);
215
+ src.copyTo (dst.colRange (1 , 5 ));
216
+ dst.col (0 ).setTo (0 ); // First column are batch ids. Keep it zeros too.
217
+
218
+ if (numDets < keepTopAfterNMS)
219
+ outputs[0 ].rowRange (numDets, keepTopAfterNMS).setTo (0 );
220
+
221
+ return true ;
222
+ }
223
+ #endif
224
+
151
225
void forward (InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
152
226
{
153
227
CV_TRACE_FUNCTION ();
154
228
CV_TRACE_ARG_VALUE (name, " name" , name.c_str ());
155
229
230
+ CV_OCL_RUN ((preferableTarget == DNN_TARGET_OPENCL) &&
231
+ OCL_PERFORMANCE_CHECK (ocl::Device::getDefault ().isIntel ()),
232
+ forward_ocl (inputs_arr, outputs_arr, internals_arr))
233
+
156
234
Layer::forward_fallback (inputs_arr, outputs_arr, internals_arr);
157
235
}
158
236
@@ -226,13 +304,30 @@ class ProposalLayerImpl : public ProposalLayer
226
304
return slice (m, Range::all (), Range (channels / 2 , channels));
227
305
}
228
306
307
+ #ifdef HAVE_OPENCL
308
+ static UMat getObjectScores (const UMat& m)
309
+ {
310
+ CV_Assert (m.dims == 4 );
311
+ CV_Assert (m.size [0 ] == 1 );
312
+ int channels = m.size [1 ];
313
+ CV_Assert ((channels & 1 ) == 0 );
314
+
315
+ Range r = Range (channels / 2 , channels);
316
+ Range ranges[4 ] = { Range::all (), r, Range::all (), Range::all () };
317
+ return m (&ranges[0 ]);
318
+ }
319
+ #endif
320
+
229
321
Ptr <PriorBoxLayer> priorBoxLayer;
230
322
Ptr <DetectionOutputLayer> detectionOutputLayer;
231
323
232
324
Ptr <PermuteLayer> deltasPermute;
233
325
Ptr <PermuteLayer> scoresPermute;
234
326
uint32_t keepTopAfterNMS;
235
327
Mat fakeImageBlob;
328
+ #ifdef HAVE_OPENCL
329
+ UMat umat_fakeImageBlob;
330
+ #endif
236
331
};
237
332
238
333
0 commit comments