forked from pdollar/toolbox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convnFast.m
272 lines (246 loc) · 9.21 KB
/
convnFast.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
function C = convnFast( A, B, shape )
% Fast convolution, replacement for both conv2 and convn.
%
% See conv2 or convn for more information on convolution in general.
%
% This works as a replacement for both conv2 and convn. Basically,
% performs convolution in either the frequency or spatial domain, depending
% on which it thinks will be faster (see below). In general, if A is much
% bigger then B then spatial convolution will be faster, but if B is of
% similar size to A and both are fairly big (such as in the case of
% correlation), convolution as multiplication in the frequency domain will
% tend to be faster.
%
% The shape flag can take on 1 additional value which is 'smooth'. This
% flag is intended for use with smoothing kernels. The returned matrix C
% is the same size as A with boundary effects handled in a special manner.
% That is instead of A being zero padded before being convolved with B;
% near the boundaries a cropped version of the matrix B is used, and the
% results is scaled by the fraction of the weight found in the cropped
% version of B. In this case each dimension of B must be odd, and all
% elements of B must be positive. There are other restrictions on when
% this flag can be used, and in general it is only useful for smoothing
% kernels. For 2D filtering it does not have much overhead, for 3D it has
% more and for higher dimensions much much more.
%
% For optimal performance some timing constants must be set to choose
% between doing convolution in the spatial and frequency domains, for more
% info see timeConv below.
%
% USAGE
% C = convnFast( A, B, [shape] )
%
% INPUTS
% A - d dimensional input matrix
% B - d dimensional matrix to convolve with A
% shape - ['full'] 'valid', 'full', 'same', or 'smooth'
%
% OUTPUTS
% C - result of convolution
%
% EXAMPLE
%
% See also CONV2, CONVN
%
% Piotr's Image&Video Toolbox Version 2.61
% Copyright 2011 Piotr Dollar. [pdollar-at-caltech.edu]
% Please email me if you find bugs, or have suggestions or questions!
% Licensed under the Lesser GPL [see external/lgpl.txt]
if( nargin<3 || isempty(shape)); shape='full'; end
if(isempty(strmatch(shape, char({'same', 'valid', 'full', 'smooth'}))))
error( 'convnFast: unknown shape flag' ); end
shapeorig = shape;
smoothFlag = (strcmp(shape,'smooth'));
if( smoothFlag ); shape = 'same'; end;
% get dimensions of A and B
ndA = ndims(A); ndB = ndims(B); nd = max(ndA,ndB);
sizA = size(A); sizB = size(B);
if (ndA>ndB); sizB = [sizB ones(1,ndA-ndB)]; end
if (ndA<ndB); sizA = [sizA ones(1,ndB-ndA)]; end
% ERROR CHECK if smoothflag
if( smoothFlag )
if( ~all( mod(sizB,2)==1 ) )
error('If flag==''smooth'' then must have odd sized mask');
end;
if( ~all( B>0 ) )
error('If flag==''smooth'' then mask must have >0 values.');
end;
if( any( (sizB-1)/2>sizA ) )
error('B is more then twice as big as A, cannot use flag==''smooth''');
end;
end
% OPTIMIZATION for 3D conv when B is actually 2D - calls (spatial) conv2
% repeatedly on 2D slices of A. Note that may need to rearange A and B
% first and use recursion. The benefits carry over to convnBound
% (which is faster for 2D arrays).
if( ndA==3 && ndB==3 && (sizB(1)==1 || sizB(2)==1) )
if (sizB(1)==1)
A = permute( A, [2 3 1]); B = permute( B, [2 3 1]);
C = convnFast( A, B, shapeorig );
C = permute( C, [3 1 2] );
elseif (sizB(2)==1)
A = permute( A, [3 1 2]); B = permute( B, [3 1 2]);
C = convnFast( A, B, shapeorig );
C = permute( C, [2 3 1] );
end
return;
elseif( ndA==3 && ndB==2 )
C1 = conv2( A(:,:,1), B, shape );
C = zeros( [size(C1), sizA(3)] ); C(:,:,1) = C1;
for i=2:sizA(3); C(:,:,i) = conv2( A(:,:,i), B, shape ); end
if (smoothFlag)
for i=1:sizA(3)
C(:,:,i) = convnBound(A(:,:,i),B,C(:,:,i),sizA(1:2),sizB(1:2));
end
end
return;
end
% get predicted time of convolution in frequency and spatial domain
% constants taken from timeConv
sizfft = 2.^ceil(real(log2(sizA+sizB-1))); psizfft=prod(sizfft);
frequenPt = 3 * 1e-7 * psizfft * log(psizfft);
if (nd==2)
spatialPt = 5e-9 * sizA(1) * sizA(2) * sizB(1) * sizB(2);
else
spatialPt = 5e-8 * prod(sizA) * prod(sizB);
end
% perform convolution
if ( spatialPt < frequenPt )
if (nd==2)
C = conv2( A, B, shape );
else
C = convn( A, B, shape );
end
else
C = convnFreq( A, B, sizA, sizB, shape );
end;
% now correct boundary effects (if shape=='smooth')
if( ~smoothFlag ); return; end;
C = convnBound( A, B, C, sizA, sizB );
function C = convnBound( A, B, C, sizA, sizB )
% calculate boundary values for C in spatial domain
nd = length(sizA);
radii = (sizB-1)/2;
% flip B appropriately (conv flips B)
for d=1:nd; B = flipdim(B,d); end
% accelerated case for 1D mask B
if( nd==2 && sizB(1)==1 )
sumB=sum(B(:)); r=radii(2); O=ones(1,sizA(1));
for i=1:r
Ai=A(:,1:r+i); Bi=B(r+2-i:end);
C(:,i)=sum(Ai.*Bi(O,:),2)/sum(Bi)*sumB;
Ai=A(:,end+1-r-i:end); Bi=B(1:(end-r+i-1));
C(:,end-i+1)=sum(Ai.*Bi(O,:),2)/sum(Bi)*sumB;
end; return;
elseif( nd==2 && sizB(2)==1 )
sumB=sum(B(:)); r=radii(1); O=ones(1,sizA(2));
for i=1:r
Ai=A(1:r+i,:); Bi=B(r+2-i:end);
C(i,:)=sum(Ai.*Bi(:,O),1)/sum(Bi)*sumB;
Ai=A(end+1-r-i:end,:); Bi=B(1:(end-r+i-1));
C(end-i+1,:)=sum(Ai.*Bi(:,O),1)/sum(Bi)*sumB;
end; return;
end
% get location that need to be updated
inds = {':'}; inds = inds(:,ones(1,nd));
Dind = zeros( sizA );
for d=1:nd
inds1 = inds; inds1{ d } = 1:radii(d);
inds2 = inds; inds2{ d } = sizA(d)-radii(d)+1:sizA(d);
Dind(inds1{:}) = 1; Dind(inds2{:}) = 1;
end
Dind = find( Dind );
Dndx = ind2sub2( sizA, Dind );
nlocs = length(Dind);
% get cuboid dimensions for all the boundary regions
sizeArep = repmat( sizA, [nlocs,1] );
radiiRep = repmat( radii, [nlocs,1] );
Astarts = max(1,Dndx-radiiRep);
Aends = min( sizeArep, Dndx+radiiRep);
Bstarts = Astarts + (1-Dndx+radiiRep);
Bends = Bstarts + (Aends-Astarts);
% now update these locations
vs = zeros( 1, nlocs );
if( nd==2 )
for i=1:nlocs % accelerated for 2D arrays
Apart = A( Astarts(i,1):Aends(i,1), Astarts(i,2):Aends(i,2) );
Bpart = B( Bstarts(i,1):Bends(i,1), Bstarts(i,2):Bends(i,2) );
v = (Apart.*Bpart); vs(i) = sum(v(:)) ./ sum(Bpart(:));
end
elseif( nd==3 ) % accelerated for 3D arrays
for i=1:nlocs
Apart = A( Astarts(i,1):Aends(i,1), Astarts(i,2):Aends(i,2), ...
Astarts(i,3):Aends(i,3) );
Bpart = B( Bstarts(i,1):Bends(i,1), Bstarts(i,2):Bends(i,2), ...
Bstarts(i,3):Bends(i,3) );
za = sum(sum(sum(Apart.*Bpart))); zb=sum(sum(sum(Bpart)));
vs(1,i) = za./zb;
end
else % general case [slow]
extract=cell(1,nd);
for i=1:nlocs
for d=1:nd; extract{d} = Astarts(i,d):Aends(i,d); end
Apart = A( extract{:} );
for d=1:nd; extract{d} = Bstarts(i,d):Bends(i,d); end
Bpart = B( extract{:} );
v = (Apart.*Bpart); vs(i) = sum(v(:)) ./ sum(Bpart(:));
end
end
C( Dind ) = vs * sum(B(:));
function C = convnFreq( A, B, sizA, sizB, shape )
% Convolution as multiplication in the frequency domain
siz = sizA + sizB - 1;
% calculate correlation in frequency domain
Fa = fftn(A,siz);
Fb = fftn(B,siz);
C = ifftn(Fa .* Fb);
% make sure output is real if inputs were both real
if(isreal(A) && isreal(B)); C = real(C); end
% crop to size
if(strcmp(shape,'valid'))
C = arrayToDims( C, max(0,sizA-sizB+1 ) );
elseif(strcmp(shape,'same'))
C = arrayToDims( C, sizA );
elseif(~strcmp(shape,'full'))
error('unknown shape');
end
function K = timeConv() %#ok<DEFNU>
% Function used to calculate constants for prediction of convolution in the
% frequency and spatial domains. Method taken from normxcorr2.m
% May need to reset K's if placing this on a new machine, however, their
% ratio should be about the same..
mintime = 4;
switch 3
case 1 % conv2 [[empirically K = 5e-9]]
% convolution time = K*prod(size(a))*prod(size(b))
siza = 30; sizb = 200;
a = ones(siza); b = ones(sizb);
t1 = cputime; t2 = t1; k = 0;
while (t2-t1)<mintime;
disc = conv2(a,b); k = k + 1; t2 = cputime; %#ok<NASGU>
end
K = (t2-t1)/k/siza^2/sizb^2;
case 2 % convn [[empirically K = 5e-8]]
% convolution time = K*prod(size(a))*prod(size(b))
siza = [10 10 10]; sizb = [30 30 10];
a = ones(siza); b = ones(sizb);
t1 = cputime; t2 = t1; k = 0;
while (t2-t1)<mintime;
disc = convn(a,b); k = k + 1; t2 = cputime; %#ok<NASGU>
end
K = (t2-t1)/k/prod(siza)/prod(sizb);
case 3 % fft (one dimensional) [[empirically K = 1e-7]]
% fft time = K * n log(n) [if n is power of 2]
% Works fastest for powers of 2. (so always zero pad until have
% size of power of 2?). 2 dimensional fft has to apply single
% dimensional fft to each column, and then signle dimensional fft
% to each resulting row. time = K * (mn)log(mn). Likewise for
% highter dimensions. convnFreq requires 3 such ffts.
n = 2^nextpow2(2^15);
vec = complex(rand(n,1),rand(n,1));
t1 = cputime; t2 = t1; k = 0;
while (t2-t1) < mintime;
disc = fft(vec); k = k + 1; t2 = cputime; %#ok<NASGU>
end
K = (t2-t1) / k / n / log(n);
end