Skip to content

Commit

Permalink
Sped up construction of BCast helper class for the very common case
Browse files Browse the repository at this point in the history
where both shapes are the same by using much more straightforward
code to achieve the same ultimate initialization of the various
instance variables with simpler code.

Added benchmark for this to bcast_test.cc.  Speeds up the same_shape
case by 65% (67 ns to 23 ns for a two-dimensional shape) without
any real effect on the different shape case.

Run on machine with (40 X 2801 MHz CPUs); 2016/01/28-11:12:26
CPU: Intel Ivybridge with HyperThreading (20 cores) dL1:32KB dL2:256KB dL3:25MB
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_BCastSetup/0                          122       122     +0.0%
BM_BCastSetup/1                           67        23    +65.7%
Change: 113374076
  • Loading branch information
A. Unique TensorFlower authored and Vijay Vasudevan committed Jan 30, 2016
1 parent 5dc0ab7 commit 938902b
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 103 deletions.
220 changes: 119 additions & 101 deletions tensorflow/core/util/bcast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,114 +22,132 @@ namespace tensorflow {
void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); }

BCast::BCast(const Vec& sx, const Vec& sy) {
// Reverse the shape of x and y for convenience.
// After the reverse, 0-th is the inner-most dimension.
Vec x = sx;
Reverse(&x);
Vec y = sy;
Reverse(&y);

// 1-extend and align x and y so that they are the same size.
if (x.size() > y.size()) {
y.resize(x.size(), 1);
if (sx == sy) {
// Fast path for common case of identical shapes for sx and sy
int64 elements = 1;
const int n = sx.size();
output_.resize(n);
for (int i = 0; i < n; i++) {
int64 dim = sx[i];
elements *= dim;
output_[i] = dim;
}
x_reshape_.push_back(elements);
y_reshape_.push_back(elements);
x_bcast_.push_back(1);
y_bcast_.push_back(1);
result_.push_back(elements);
// grad_x_reduce_ and grad_y_reduce_ are left as empty
} else {
x.resize(y.size(), 1);
}
// Reverse the shape of x and y for convenience.
// After the reverse, 0-th is the inner-most dimension.
Vec x = sx;
Reverse(&x);
Vec y = sy;
Reverse(&y);

// Going through each dimension starting from the inner-most
// dimension, compares dimension of x and y. They are compatible if
// they are equal or either is 1.
enum State {
UNKNOWN,
SAME,
X_ONE,
Y_ONE,
};
State prev = UNKNOWN;
const int64 n = x.size();
for (int i = 0; i < n; ++i) {
// Output shape.
State curr = UNKNOWN;
const int64 x_i = x[i]; // i-th dimension of x.
CHECK_GE(x_i, 0);
const int64 y_i = y[i]; // i-th dimension of y.
CHECK_GE(y_i, 0);
int64 o_i; // i-th dimension of the output.
int64 bx_i; // i-th broadcast for x.
int64 by_i; // i-th broadcast for y.
// Invariant:
// o_i = x_i * bx_i = y_i * by_i
if (x_i == y_i) {
// No broadcast.
o_i = x_i;
bx_i = 1;
by_i = 1;
curr = SAME;
} else if (x_i == 1) {
// x broadcast to y on this dimension.
o_i = y_i;
bx_i = y_i;
by_i = 1;
grad_x_reduce_idx_.push_back(n - 1 - i);
curr = X_ONE;
} else if (y_i == 1) {
// y broadcast to x on this dimension.
o_i = x_i;
bx_i = 1;
by_i = x_i;
grad_y_reduce_idx_.push_back(n - 1 - i);
curr = Y_ONE;
// 1-extend and align x and y so that they are the same size.
if (x.size() > y.size()) {
y.resize(x.size(), 1);
} else {
valid_ = false;
return;
x.resize(y.size(), 1);
}
output_.push_back(o_i);
// Reshape/broadcast.
// Invariant:
// result[i] == x_reshape[i] * x_bcast[i] == y_reshape_[i] * y_bcast_[i]
if (curr == SAME && x_i == 1) {
// Both side are 1s.
grad_x_reduce_idx_.push_back(n - 1 - i);
grad_y_reduce_idx_.push_back(n - 1 - i);
continue;
} else if (prev == curr) {
// It is a run of the same cases (no broadcast, x broadcast to
// y, y broadcast to x). We can reshape the input so that fewer
// dimensions are involved in the intermediate computation.
result_.back() *= o_i;
x_reshape_.back() *= x_i;
x_bcast_.back() *= bx_i;
y_reshape_.back() *= y_i;
y_bcast_.back() *= by_i;
} else {
result_.push_back(o_i);
x_reshape_.push_back(x_i);
x_bcast_.push_back(bx_i);
y_reshape_.push_back(y_i);
y_bcast_.push_back(by_i);

// Going through each dimension starting from the inner-most
// dimension, compares dimension of x and y. They are compatible if
// they are equal or either is 1.
enum State {
UNKNOWN,
SAME,
X_ONE,
Y_ONE,
};
State prev = UNKNOWN;
const int64 n = x.size();
for (int i = 0; i < n; ++i) {
// Output shape.
State curr = UNKNOWN;
const int64 x_i = x[i]; // i-th dimension of x.
CHECK_GE(x_i, 0);
const int64 y_i = y[i]; // i-th dimension of y.
CHECK_GE(y_i, 0);
int64 o_i; // i-th dimension of the output.
int64 bx_i; // i-th broadcast for x.
int64 by_i; // i-th broadcast for y.
// Invariant:
// o_i = x_i * bx_i = y_i * by_i
if (x_i == y_i) {
// No broadcast.
o_i = x_i;
bx_i = 1;
by_i = 1;
curr = SAME;
} else if (x_i == 1) {
// x broadcast to y on this dimension.
o_i = y_i;
bx_i = y_i;
by_i = 1;
grad_x_reduce_idx_.push_back(n - 1 - i);
curr = X_ONE;
} else if (y_i == 1) {
// y broadcast to x on this dimension.
o_i = x_i;
bx_i = 1;
by_i = x_i;
grad_y_reduce_idx_.push_back(n - 1 - i);
curr = Y_ONE;
} else {
valid_ = false;
return;
}
output_.push_back(o_i);
// Reshape/broadcast.
// Invariant:
// result[i] == x_reshape[i] * x_bcast[i] == y_reshape_[i] * y_bcast_[i]
if (curr == SAME && x_i == 1) {
// Both side are 1s.
grad_x_reduce_idx_.push_back(n - 1 - i);
grad_y_reduce_idx_.push_back(n - 1 - i);
continue;
} else if (prev == curr) {
// It is a run of the same cases (no broadcast, x broadcast to
// y, y broadcast to x). We can reshape the input so that fewer
// dimensions are involved in the intermediate computation.
result_.back() *= o_i;
x_reshape_.back() *= x_i;
x_bcast_.back() *= bx_i;
y_reshape_.back() *= y_i;
y_bcast_.back() *= by_i;
} else {
result_.push_back(o_i);
x_reshape_.push_back(x_i);
x_bcast_.push_back(bx_i);
y_reshape_.push_back(y_i);
y_bcast_.push_back(by_i);
}
prev = curr;
}
prev = curr;
}

if (result_.empty()) {
// Can happen when both x and y are effectively scalar.
result_.push_back(1);
x_reshape_.push_back(1);
x_bcast_.push_back(1);
y_reshape_.push_back(1);
y_bcast_.push_back(1);
}
if (result_.empty()) {
// Can happen when both x and y are effectively scalar.
result_.push_back(1);
x_reshape_.push_back(1);
x_bcast_.push_back(1);
y_reshape_.push_back(1);
y_bcast_.push_back(1);
}

// Reverse all vectors since x and y were reversed at very
// beginning.
Reverse(&x_reshape_);
Reverse(&x_bcast_);
Reverse(&y_reshape_);
Reverse(&y_bcast_);
Reverse(&result_);
Reverse(&output_);
Reverse(&grad_x_reduce_idx_);
Reverse(&grad_y_reduce_idx_);
// Reverse all vectors since x and y were reversed at very
// beginning.
Reverse(&x_reshape_);
Reverse(&x_bcast_);
Reverse(&y_reshape_);
Reverse(&y_bcast_);
Reverse(&result_);
Reverse(&output_);
Reverse(&grad_x_reduce_idx_);
Reverse(&grad_y_reduce_idx_);
}
}

} // end namespace tensorflow
3 changes: 1 addition & 2 deletions tensorflow/core/util/bcast.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace tensorflow {
// TODO(zhifengc): Adds support for n-ary (n >= 2).
class BCast {
public:
// A vector of int32 representing the shape of tensor. The 0-th
// A vector of int64 representing the shape of tensor. The 0-th
// element is the outer-most dimension and the last element is the
// inner-most dimension. Note that we do not use TensorShape since
// it's more convenient to manipulate Vec directly for this module.
Expand Down Expand Up @@ -104,7 +104,6 @@ class BCast {
Vec grad_y_reduce_idx_;

static void Reverse(Vec* shape);
static bool HasZero(const Vec& shape);

TF_DISALLOW_COPY_AND_ASSIGN(BCast);
};
Expand Down
25 changes: 25 additions & 0 deletions tensorflow/core/util/bcast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/test_benchmark.h"

namespace tensorflow {
namespace {
Expand Down Expand Up @@ -57,6 +58,15 @@ TEST(BCastTest, Basic_SameShape) {
"[][]");
}

TEST(BCastTest, Basic_SameShapeWithZeroDim) {
// Effectively no broadcast needed.
EXPECT_EQ(BCast({11, 7, 0, 3, 2}, {11, 7, 0, 3, 2}),
"[0][1][0][1]"
"[0]"
"[11,7,0,3,2]"
"[][]");
}

TEST(BCastTest, Basic_Scalar_Scalar) {
// Effectively it's a scalar and a scalar.
// [1, 1] [1]
Expand Down Expand Up @@ -237,5 +247,20 @@ TEST(BCastTest, TestZeroDimensionShape) {
"[0,1,3][]");
}

static void BM_BCastSetup(int iters, int same_shape) {
if (same_shape) {
testing::SetLabel("same_shapes");
while (--iters > 0) {
class BCast b({1000, 100}, {1000, 100});
}
} else {
testing::SetLabel("different_shapes");
while (--iters > 0) {
class BCast b({3, 1, 5}, {2, 0, 3, 0, 5});
}
}
}
BENCHMARK(BM_BCastSetup)->Arg(0)->Arg(1);

} // namespace
} // namespace tensorflow

0 comments on commit 938902b

Please sign in to comment.