Skip to content

Commit

Permalink
Add shape inference for LpPool, RoiPool, and fix MaxPool, AveragePool…
Browse files Browse the repository at this point in the history
…, and Conv (onnx#928)

* Make the coefficient non optional for LinearClassifier

* Fix the build issue by updating the changelog and the operators-ml.md files

* Add shape inference for LpPool, RoiPool, and fix MaxPool, AveragePool, and Conv

* Add shape inference for LpPool, RoiPool, and fix MaxPool, AveragePool, and Conv

* fix the bug in the conv shape inference

* fix review comments

* fix the python style issues
  • Loading branch information
jaliyae authored and bddppq committed May 13, 2018
1 parent 490c4c6 commit 0bd3f78
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 55 deletions.
178 changes: 123 additions & 55 deletions onnx/defs/nn/defs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ static std::string auto_pad_doc =

namespace ONNX_NAMESPACE {

void convPoolTypeAndShapeInference(InferenceContext& ctx, bool use_dilation, bool require_kernel_shape) {
void convPoolTypeAndShapeInference(
InferenceContext& ctx,
bool use_dilation,
bool require_kernel_shape) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);

if (!hasNInputShapes(ctx, 2)) {
// we need at least one input to have a shape for this inference.
if (!hasNInputShapes(ctx, 1)) {
return;
}

// if no kernel shape is required, then we need two inputs.
if (!require_kernel_shape && !hasNInputShapes(ctx, 2)) {
return;
}

Expand All @@ -36,34 +45,31 @@ void convPoolTypeAndShapeInference(InferenceContext& ctx, bool use_dilation, boo
return;
}

size_t n_input_dims = (size_t) (ctx.getInputType(0)->tensor_type().shape().dim_size() - 2);
auto input_shape = ctx.getInputType(0)->tensor_type().shape();
if (input_shape.dim_size() < 2) {
return; // The input shape is not properly set.
}

// first dim is the batch axis and the next is the number of channels.
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);

// Pooling operations don't support dilation, only Conv. For
// simplicity of the code, we just treat them as having all-1s
// dilation.
std::vector<int64_t> dilations;
bool nodilations = false;
if (use_dilation && getRepeatedAttribute(ctx, "dilations", dilations)) {
if (dilations.size() != n_input_dims) {
return;
}
} else {
nodilations = true;
dilations.assign(n_input_dims, 1);
}

std::vector<int64_t> kernel_shape;
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) {
if (kernel_shape.size() != static_cast<size_t>(ctx.getInputType(0)->tensor_type().shape().dim_size() - 2)) {
return;
}
} else if (require_kernel_shape) {
return;
} else {
for (int i = 2; i < ctx.getInputType(1)->tensor_type().shape().dim_size(); ++i) {
if (!ctx.getInputType(1)->tensor_type().shape().dim(i).has_dim_value()) {
return;
}
kernel_shape.push_back(ctx.getInputType(1)->tensor_type().shape().dim(i).dim_value());
}
int64_t groups = getAttribute(ctx, "group", 1);
if (groups != 1) {
return; // we don't handle the group case.
}

std::vector<int64_t> pads;
Expand All @@ -84,32 +90,60 @@ void convPoolTypeAndShapeInference(InferenceContext& ctx, bool use_dilation, boo
strides.assign(n_input_dims, 1);
}

*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->add_dim() =
ctx.getInputType(0)->tensor_type().shape().dim(0);
*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->add_dim() =
ctx.getInputType(1)->tensor_type().shape().dim(0);
std::vector<int64_t> kernel_shape;
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) {
if (kernel_shape.size() != n_input_dims) {
return;
}
} else if (require_kernel_shape) {
return;
} else {
auto second_input_shape = ctx.getInputType(1)->tensor_type().shape();
for (int i = 2; i < second_input_shape.dim_size(); ++i) {
if (!second_input_shape.dim(i).has_dim_value()) {
return;
}
kernel_shape.push_back(second_input_shape.dim(i).dim_value());
}
}

auto output_shape =
ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();

if (require_kernel_shape) {
// add the first two dimensions from the input.
*output_shape->add_dim() = input_shape.dim(0);
*output_shape->add_dim() = input_shape.dim(1);
} else {
*output_shape->add_dim() = input_shape.dim(0);
*output_shape->add_dim() =
ctx.getInputType(1)->tensor_type().shape().dim(0);
}

for (int i = 0; i < static_cast<int>(kernel_shape.size()); ++i) {
auto newdim = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->add_dim();
if (!ctx.getInputType(0)->tensor_type().shape().dim(2 + i).has_dim_value()) {
int kernel_shape_size = static_cast<int>(kernel_shape.size());
for (int i = 0; i < kernel_shape_size; ++i) {
auto newdim = output_shape->add_dim();
if (!input_shape.dim(2 + i).has_dim_value()) {
continue;
}
// how big is the input, including padding
int64_t effective_input_size = ctx.getInputType(0)->tensor_type().shape().dim(2 + i).dim_value();
int64_t effective_input_size = input_shape.dim(2 + i).dim_value();
effective_input_size += pads[i];
effective_input_size += pads[i + static_cast<int>(kernel_shape.size())];
effective_input_size += pads[i + kernel_shape_size];

// accounting for dilation, how big is the kernel in this dimension
int64_t effective_kernel_size = kernel_shape[i];
effective_kernel_size = (effective_kernel_size - 1) * dilations[i] + 1;
if (!nodilations) {
// how big is the kernel in this dimension
effective_kernel_size = (effective_kernel_size - 1) * dilations[i] + 1;
}

// how many times we can move the kernel from it's initial position, based on the stride
int64_t strided_kernel_positions = (effective_input_size - effective_kernel_size) / strides[i];
// how many times we can move the kernel from it's initial position, based
// on the stride
int64_t strided_kernel_positions =
(effective_input_size - effective_kernel_size) / strides[i];

// add in the initial position
int64_t total_kernel_positions = 1 + strided_kernel_positions;

newdim->set_dim_value(total_kernel_positions);
newdim->set_dim_value(1 + strided_kernel_positions);
}
}

Expand Down Expand Up @@ -187,9 +221,11 @@ std::function<void(OpSchema&)> PoolOpSchemaGenerator(
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.");
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { convPoolTypeAndShapeInference(ctx, false, true); });
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
convPoolTypeAndShapeInference(ctx, false, true);
});
};
}
} // namespace ONNX_NAMESPACE

ONNX_OPERATOR_SCHEMA(AveragePool)
.FillUsing(PoolOpSchemaGenerator(
Expand Down Expand Up @@ -271,13 +307,50 @@ std::function<void(OpSchema&)> LpPoolOpSchemaGenerator(const char* name) {
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.");
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
convPoolTypeAndShapeInference(ctx, false, true);
});
};
}

ONNX_OPERATOR_SCHEMA(LpPool).FillUsing(LpPoolOpSchemaGenerator("LpPool"));

} // namespace ONNX_NAMESPACE

// For ROI pool operations.
void roiPoolTypeShapeInference(InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);

// rois is the second input.
if (!hasNInputShapes(ctx, 2)) {
return;
}

auto input_shape = ctx.getInputType(0)->tensor_type().shape();
auto rios_shape = ctx.getInputType(1)->tensor_type().shape();

// first dim is the batch axis and the next is the number of channels.
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);

std::vector<int64_t> pooled_shape;
if (getRepeatedAttribute(ctx, "pooled_shape", pooled_shape)) {
if (pooled_shape.size() != n_input_dims) {
return;
}
} else {
return; // cannot produce output shape.
}

// (num_rois, channels, pooled_shape[0], pooled_shape[1])
auto output_shape =
ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();

*output_shape->add_dim() = rios_shape.dim(0);
*output_shape->add_dim() = input_shape.dim(1);
output_shape->add_dim()->set_dim_value(pooled_shape[0]);
output_shape->add_dim()->set_dim_value(pooled_shape[1]);
}

namespace ONNX_NAMESPACE {
std::function<void(OpSchema&)> RoiPoolOpSchemaGenerator(const char* name) {
return [=](OpSchema& schema) {
Expand Down Expand Up @@ -321,6 +394,8 @@ std::function<void(OpSchema&)> RoiPoolOpSchemaGenerator(const char* name) {
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.");
schema.TypeAndShapeInferenceFunction(
[](InferenceContext& ctx) { roiPoolTypeShapeInference(ctx); });
};
}

Expand Down Expand Up @@ -406,7 +481,9 @@ computes the output.)DOC";
"number of groups input channels and output channels are divided into, default is 1.",
AttributeProto::INT,
static_cast<int64_t>(1));
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { convPoolTypeAndShapeInference(ctx, true, false); });
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
convPoolTypeAndShapeInference(ctx, true, false);
});
};
}

Expand Down Expand Up @@ -666,11 +743,7 @@ Output case #2: Y (test mode)
"The running variance (training) or the estimated "
"variance (testing) as a 1-dimensional tensor of size C.",
"T")
.Output(
0,
"Y",
"The output tensor of the same shape as X.",
"T")
.Output(0, "Y", "The output tensor of the same shape as X.", "T")
.Output(
1,
"mean",
Expand Down Expand Up @@ -704,11 +777,10 @@ Output case #2: Y (test mode)
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.")
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
propagateShapeAndTypeFromFirstInput(ctx);
// TODO in training mode, it may be possible to infer some of
// the other outputs as well.
});

propagateShapeAndTypeFromFirstInput(ctx);
// TODO in training mode, it may be possible to infer some of
// the other outputs as well.
});

ONNX_OPERATOR_SCHEMA(InstanceNormalization)
.SinceVersion(6)
Expand Down Expand Up @@ -739,18 +811,14 @@ where mean and variance are computed per instance per channel.
"T")
.Input(1, "scale", "The input 1-dimensional scale tensor of size C.", "T")
.Input(2, "B", "The input 1-dimensional bias tensor of size C.", "T")
.Output(
0,
"output",
"The output tensor of the same shape as input.",
"T")
.Output(0, "output", "The output tensor of the same shape as input.", "T")
.TypeConstraint(
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.")
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
propagateShapeAndTypeFromFirstInput(ctx);
});
propagateShapeAndTypeFromFirstInput(ctx);
});

ONNX_OPERATOR_SCHEMA(LpNormalization)
.Input(0, "input", "Input matrix", "T")
Expand All @@ -773,8 +841,8 @@ Given a matrix, apply Lp-normalization along the provided axis.
AttributeProto::INT,
static_cast<int64_t>(2))
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
propagateShapeAndTypeFromFirstInput(ctx);
});
propagateShapeAndTypeFromFirstInput(ctx);
});

ONNX_OPERATOR_SCHEMA(Dropout)
.SinceVersion(6)
Expand Down
92 changes: 92 additions & 0 deletions onnx/test/shape_inference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,98 @@ def test_softmax(self):
[])
self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5))])

def test_maxpool(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3))])

def test_maxpool_3D(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4, 4))],
[make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3, 3))])

def test_maxpool_with_padding(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], pads=[1, 1, 2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 6, 6))])

def test_maxpool_with_padding_and_stride(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], pads=[1, 1, 2, 2], strides=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3))])

def test_averagepool(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3))])

def test_averagepool_3D(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4, 4))],
[make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3, 3))])

def test_averagepool_with_padding(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2], pads=[1, 1, 2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 6, 6))])

def test_averagepool_with_padding_and_stride(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2], pads=[1, 1, 2, 2], strides=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3))])

def test_lppool(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("LpPool", ["X"], ["Y"], kernel_shape=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3))])

def test_lppool_3D(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4, 4))],
[make_node("LpPool", ["X"], ["Y"], kernel_shape=[2, 2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3, 3))])

def test_lppool_with_padding(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("LpPool", ["X"], ["Y"], kernel_shape=[2, 2], pads=[1, 1, 2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 6, 6))])

def test_lppool_with_padding_and_stride(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4))],
[make_node("LpPool", ["X"], ["Y"], kernel_shape=[2, 2], pads=[1, 1, 2, 2], strides=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (5, 3, 3, 3))])

def test_roipool(self):
graph = self._make_graph(
[("X", TensorProto.FLOAT, (5, 3, 4, 4)),
("rois", TensorProto.INT64, (2, 5))],
[make_node("MaxRoiPool", ["X", "rois"], ["Y"], pooled_shape=[2, 2])],
[])
self._assert_inferred(graph, [make_tensor_value_info("Y", TensorProto.FLOAT, (2, 3, 2, 2))])

def test_lp_norm(self):
graph = self._make_graph(
[('x', TensorProto.FLOAT, (3, 4, 5, 6, 7))],
Expand Down

0 comments on commit 0bd3f78

Please sign in to comment.