Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
alalek committed Jul 16, 2020
2 parents d17ab27 + b2ebd37 commit 9b7b22e
Show file tree
Hide file tree
Showing 15 changed files with 717 additions and 127 deletions.
4 changes: 2 additions & 2 deletions cmake/OpenCVDetectInferenceEngine.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ endif()

if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version has not been set, 2020.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
message(WARNING "InferenceEngine version has not been set, 2020.4 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif()
set(INF_ENGINE_RELEASE "2020030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set(INF_ENGINE_RELEASE "2020040000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
)
Expand Down
95 changes: 95 additions & 0 deletions modules/dnn/perf/perf_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.

#include "perf_precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>

namespace opencv_test {

struct Layer_Slice : public TestBaseWithParam<tuple<Backend, Target> >
{
template<int DIMS>
void test_slice(const int* inputShape, const int* begin, const int* end)
{
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());

Mat input(DIMS, inputShape, CV_32FC1, Scalar::all(0));
for (int i = 0; i < (int)input.total(); ++i)
input.ptr<float>()[i] = (float)(i & 4095);

std::vector<Range> range(DIMS);
for (int i = 0; i < DIMS; ++i)
range[i] = Range(begin[i], end[i]);

Net net;
LayerParams lp;
lp.type = "Slice";
lp.name = "testLayer";
lp.set("begin", DictValue::arrayInt<int*>((int*)&begin[0], DIMS));
lp.set("end", DictValue::arrayInt<int*>((int*)&end[0], DIMS));
net.addLayerToPrev(lp.name, lp.type, lp);

// warmup
{
net.setInput(input);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward();

EXPECT_GT(cv::norm(out, NORM_INF), 0);
#if 0
//normAssert(out, input(range));
cout << input(range).clone().reshape(1, 1) << endl;
cout << out.reshape(1, 1) << endl;
#endif
}

TEST_CYCLE()
{
Mat res = net.forward();
}

SANITY_CHECK_NOTHING();
}
};



PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_1)
{
const int inputShape[4] = {1, 64, 104, 104};
const int begin[] = {0, 32, 0, 0};
const int end[] = {1, 64, 104, 104};
test_slice<4>(inputShape, begin, end);
}

PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_2)
{
const int inputShape[4] = {1, 128, 52, 52};
const int begin[] = {0, 64, 0, 0};
const int end[] = {1, 128, 52, 52};
test_slice<4>(inputShape, begin, end);
}

PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_3)
{
const int inputShape[4] = {1, 256, 26, 26};
const int begin[] = {0, 128, 0, 0};
const int end[] = {1, 256, 26, 26};
test_slice<4>(inputShape, begin, end);
}


PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16)
{
const int inputShape[4] = {1, 128, 80, 100};
const int begin[] = {0, 0, 2, 2};
const int end[] = {1, 128, 76, 96};
test_slice<4>(inputShape, begin, end);
}

INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));

} // namespace
15 changes: 13 additions & 2 deletions modules/dnn/perf/perf_net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
{
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
#endif

Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp;
Expand All @@ -209,6 +216,12 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4)
throw SkipTestException("");
if (target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
#endif
Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp;
Expand All @@ -220,8 +233,6 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
{
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
if (target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp;
Expand Down
3 changes: 0 additions & 3 deletions modules/dnn/src/graph_simplifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@ int Subgraph::getInputNodeId(const Ptr<ImportGraphWrapper>& net,
{
CV_Assert(inpId < node->getNumInputs());
std::string name = node->getInputName(inpId);
// If operation produces several tensors, they are specified by index
// after ':' character. In example, "input:0".
name = name.substr(0, name.rfind(':'));
const int numNodes = net->getNumNodes();
for (int i = 0; i < numNodes; ++i)
{
Expand Down
178 changes: 145 additions & 33 deletions modules/dnn/src/layers/slice_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
#include "layers_common.hpp"
#include <opencv2/dnn/shape_utils.hpp>

#include <opencv2/core/utils/logger.hpp>

#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
Expand Down Expand Up @@ -204,58 +206,168 @@ class SliceLayerImpl : public SliceLayer
finalSliceRanges[i][j] = clamp(finalSliceRanges[i][j], inpShape[j]);
}
}

#if 0
std::cout << "DEBUG: DNN/Slice: " << outputs.size() << " inpShape=" << inpShape << std::endl;
for (int i = 0; i < outputs.size(); ++i)
{
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
{
std::cout << finalSliceRanges[i][j];
}
std::cout << std::endl;
}
#endif
}

#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
#if 1
// TODO fix that (brokes YOLOv4-tiny)
return false;
#else
std::vector<UMat> inputs;
std::vector<UMat> outputs;

bool use_half = (inputs_.depth() == CV_16S);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);

if (inputs[0].dims < 4 || (total(shape(outputs[0]), 0, 2) % 4 != 0) ||
(total(shape(outputs[0]), 2) % 4 != 0))
CV_Assert(outputs.size() == finalSliceRanges.size());

const UMat& input = inputs[0];
if (input.dims > 5)
{
CV_LOG_INFO(NULL, "DNN/OpenCL/Slice: implementation doesn't support dims=" << input.dims << ". Fallback to CPU");
return false;
}

String opts;
if (use_half)
opts = "-DDtype=half -DDtype4=half4 -DDtype8=half8";
else
opts = "-DDtype=float -DDtype4=float4 -DDtype8=float8";
const UMat& inpMat = inputs[0];
size_t WSZ = 128;

const int dims = input.dims;
const int elemSize = (int)input.elemSize();
String opts0 = cv::format(
"-DDIMS=%d -DELEMSIZE=%d",
dims, elemSize
);
for (int d = 0; d < dims; d++)
{
opts0 += cv::format(" -DSRC_STEP_%d=%d", d, (int)input.step[dims - 1 - d]);
}
String kname = cv::format("slice_%d", dims);
for (size_t i = 0; i < outputs.size(); i++)
{
int groups = outputs[i].size[0];
int channels = outputs[i].size[1];
int rows = outputs[i].size[2];
int cols = outputs[i].size[3];
ocl::Kernel kernel("slice", ocl::dnn::slice_oclsrc, opts);
size_t local[] = { 128 };
size_t global[] = { (size_t)groups * channels / 4 * local[0] };
int idx = 0;
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(idx++, (int)(inpMat.size[2] * inpMat.size[3]));
kernel.set(idx++, (int)(rows * cols));
kernel.set(idx++, (int)inpMat.size[3]);
kernel.set(idx++, (int)cols);
kernel.set(idx++, (int)finalSliceRanges[i][2].start);
kernel.set(idx++, (int)finalSliceRanges[i][3].start);
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
bool ret = kernel.run(1, global, local, false);
UMat& output = outputs[i];
const std::vector<Range>& range = finalSliceRanges[i];

String opts = opts0;

CV_CheckEQ(output.dims, dims, "");
for (int d = 0; d < dims; d++)
{
opts += cv::format(" -DDST_STEP_%d=%d -DDST_SZ_%d=%d -DSRC_START_%d=%d",
d, (int)output.step[dims - 1 - d],
d, (int)output.size[dims - 1 - d],
d, (int)range[dims - 1 - d].start
);
CV_CheckEQ(range[d].size(), (int)output.size[d], "");
}

int block_dims = 0;
size_t block_size = elemSize;
for (int i = dims - 1; i >= 0; --i)
{
if (input.step[i] != output.step[i])
break;
block_size *= output.size[i];
block_dims++;
}

const size_t total = output.total() * elemSize;
size_t num_blocks = total / block_size;

if ((num_blocks <= 8 && block_size >= WSZ * 4) || (block_size >= WSZ * 64))
{
// use 1D copy mode
opts += cv::format(" -DUSE_COPY_1D=1");

opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims);
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);

opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
}
else
{
// use 2D copy mode
int block_cols = block_size;
int block_dims_contiguous = block_dims;
size_t input_base_step = input.step[dims - 1 - block_dims_contiguous];
size_t output_base_step = output.step[dims - 1 - block_dims_contiguous];

size_t block_rows = 1;
for (int i = dims - 1 - block_dims_contiguous; i >= 0; --i)
{
if (input.step[i] * output_base_step != output.step[i] * input_base_step)
break;
block_rows *= output.size[i];
block_dims++;
}

block_size *= block_rows;

num_blocks = total / block_size;

if (block_rows > 1)
{
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);

opts += cv::format(" -DBLOCK_COLS=%d", (int)block_cols);

opts += cv::format(" -DBLOCK_ROWS=%d", (int)block_rows);
opts += cv::format(" -DBLOCK_SRC_STRIDE=%d", (int)input_base_step);
}
else
{
// use 1D copy mode
opts += cv::format(" -DUSE_COPY_1D=1");

opts += cv::format(" -DBLOCK_DIMS=%d", block_dims_contiguous);
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);

opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
}
}

const size_t MIN_WORK_ITEMS = 16;
if (block_size <= 4 * MIN_WORK_ITEMS)
WSZ = 4;
else if (block_size <= 8 * MIN_WORK_ITEMS)
WSZ = 8;
else if (block_size <= 16 * MIN_WORK_ITEMS)
WSZ = 16;
else if (block_size <= 32 * MIN_WORK_ITEMS)
WSZ = 32;
else if (block_size <= 64 * MIN_WORK_ITEMS)
WSZ = 64;

opts += cv::format(" -DWSZ=%d", (int)WSZ);

size_t local[] = { WSZ, 1 };
size_t global[] = { WSZ, num_blocks };

ocl::Kernel kernel(kname.c_str(), ocl::dnn::slice_oclsrc, opts);
if (kernel.empty())
return false;
bool ret = kernel.args(
ocl::KernelArg::PtrReadOnly(input),
ocl::KernelArg::PtrWriteOnly(output)
)
.run(2, global, local, false);
if (!ret)
return false;
}
} // for outputs.size()

return true;
#endif
}
#endif

Expand Down
8 changes: 4 additions & 4 deletions modules/dnn/src/op_inf_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
#define INF_ENGINE_RELEASE_2020_4 2020040000

#ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2020.3 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_3
#warning("IE version have not been provided via command-line. Using 2020.4 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_4
#endif

#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
Expand All @@ -44,7 +44,7 @@
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif

#ifdef HAVE_DNN_IE_NN_BUILDER_2019
#if defined(HAVE_DNN_IE_NN_BUILDER_2019) || INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2020_4)
//#define INFERENCE_ENGINE_DEPRECATED // turn off deprecation warnings from IE
//there is no way to suppress warnings from IE only at this moment, so we are forced to suppress warnings globally
#if defined(__GNUC__)
Expand All @@ -53,7 +53,7 @@
#ifdef _MSC_VER
#pragma warning(disable: 4996) // was declared deprecated
#endif
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#endif

#if defined(__GNUC__) && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_1)
#pragma GCC visibility push(default)
Expand Down
Loading

0 comments on commit 9b7b22e

Please sign in to comment.