forked from facebookresearch/Detectron
-
Notifications
You must be signed in to change notification settings - Fork 0
/
blob.py
181 lines (155 loc) · 6.38 KB
/
blob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""Caffe2 blob helper functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import cv2
import numpy as np
from six.moves import cPickle as pickle
from caffe2.proto import caffe2_pb2
from detectron.core.config import cfg
def get_image_blob(im, target_scale, target_max_size):
"""Convert an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale (float): image scale (target size) / (original size)
im_info (ndarray)
"""
processed_im, im_scale = prep_im_for_blob(
im, cfg.PIXEL_MEANS, target_scale, target_max_size
)
blob = im_list_to_blob(processed_im)
# NOTE: this height and width may be larger than actual scaled input image
# due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are
# maintaining this behavior for now to make existing results exactly
# reproducible (in practice using the true input image height and width
# yields nearly the same results, but they are sometimes slightly different
# because predictions near the edge of the image will be pruned more
# aggressively).
height, width = blob.shape[2], blob.shape[3]
im_info = np.hstack((height, width, im_scale))[np.newaxis, :]
return blob, im_scale, im_info.astype(np.float32)
def im_list_to_blob(ims):
"""Convert a list of images into a network input. Assumes images were
prepared using prep_im_for_blob or equivalent: i.e.
- BGR channel order
- pixel means subtracted
- resized to the desired input size
- float32 numpy ndarray format
Output is a 4D HCHW tensor of the images concatenated along axis 0 with
shape.
"""
if not isinstance(ims, list):
ims = [ims]
max_shape = np.array([im.shape for im in ims]).max(axis=0)
# Pad the image so they can be divisible by a stride
if cfg.FPN.FPN_ON:
stride = float(cfg.FPN.COARSEST_STRIDE)
max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
num_images = len(ims)
blob = np.zeros(
(num_images, max_shape[0], max_shape[1], 3), dtype=np.float32
)
for i in range(num_images):
im = ims[i]
blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
# Move channels (axis 3) to axis 1
# Axis order will become: (batch elem, channel, height, width)
channel_swap = (0, 3, 1, 2)
blob = blob.transpose(channel_swap)
return blob
def prep_im_for_blob(im, pixel_means, target_size, max_size):
"""Prepare an image for use as a network input blob. Specially:
- Subtract per-channel pixel mean
- Convert to float32
- Rescale to each of the specified target size (capped at max_size)
Returns a list of transformed images, one for each target size. Also returns
the scale factors that were used to compute each returned image.
"""
im = im.astype(np.float32, copy=False)
im -= pixel_means
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(
im,
None,
None,
fx=im_scale,
fy=im_scale,
interpolation=cv2.INTER_LINEAR
)
return im, im_scale
def zeros(shape, int32=False):
"""Return a blob of all zeros of the given shape with the correct float or
int data type.
"""
return np.zeros(shape, dtype=np.int32 if int32 else np.float32)
def ones(shape, int32=False):
"""Return a blob of all ones of the given shape with the correct float or
int data type.
"""
return np.ones(shape, dtype=np.int32 if int32 else np.float32)
def py_op_copy_blob(blob_in, blob_out):
"""Copy a numpy ndarray given as blob_in into the Caffe2 CPUTensor blob
given as blob_out. Supports float32 and int32 blob data types. This function
is intended for copying numpy data into a Caffe2 blob in PythonOps.
"""
# Some awkward voodoo required by Caffe2 to support int32 blobs
needs_int32_init = False
try:
_ = blob.data.dtype # noqa
except Exception:
needs_int32_init = blob_in.dtype == np.int32
if needs_int32_init:
# init can only take a list (failed on tuple)
blob_out.init(list(blob_in.shape), caffe2_pb2.TensorProto.INT32)
else:
blob_out.reshape(blob_in.shape)
blob_out.data[...] = blob_in
def get_loss_gradients(model, loss_blobs):
"""Generate a gradient of 1 for each loss specified in 'loss_blobs'"""
loss_gradients = {}
for b in loss_blobs:
loss_grad = model.net.ConstantFill(b, [b + '_grad'], value=1.0)
loss_gradients[str(b)] = str(loss_grad)
return loss_gradients
def serialize(obj):
"""Serialize a Python object using pickle and encode it as an array of
float32 values so that it can be feed into the workspace. See deserialize().
"""
return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)
def deserialize(arr):
"""Unserialize a Python object from an array of float32 values fetched from
a workspace. See serialize().
"""
return pickle.loads(arr.astype(np.uint8).tobytes())