Skip to content

Commit

Permalink
Try to fix MultinomialSampler (PaddlePaddle#102)
Browse files Browse the repository at this point in the history
* Also refine unittest to multiple iteration to prevent luckily random number.
  • Loading branch information
reyoung authored and emailweixu committed Sep 23, 2016
1 parent 8e957df commit 7eb29f2
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 31 deletions.
5 changes: 3 additions & 2 deletions paddle/gserver/layers/MultinomialSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace paddle {

MultinomialSampler::MultinomialSampler(const real* prob, int size)
: rand_(0.0, size) {
intervals_.reserve(size + 1);
intervals_.resize(size + 1);
double sum = 0;
for (int i = 0; i < size; ++i) {
sum += prob[i];
Expand Down Expand Up @@ -50,12 +50,13 @@ MultinomialSampler::MultinomialSampler(const real* prob, int size)
int bigPos = nextBigPos(0);

auto fillIntervals = [&]() {
while (bigPos < size && smallPos < size) {
while (bigPos < size) {
while (intervals_[bigPos].thresh > 1 && smallPos < size) {
intervals_[smallPos].otherId = bigPos;
intervals_[bigPos].thresh -= 1 - intervals_[smallPos].thresh;
smallPos = nextSmallPos(smallPos + 1);
}
if (smallPos >= size) break;
bigPos = nextBigPos(bigPos + 1);
// If intervals_[bigPos].thresh < 1, it becomes a small interval
}
Expand Down
62 changes: 33 additions & 29 deletions paddle/gserver/tests/test_MultinomialSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,39 +41,42 @@ class MultinomialSamplerTester : public MultinomialSampler {
TEST(MultinomialSampler, gen) {
int numGrids = 1024 * 1024;
int size = 1024 * 4;

default_random_engine reng;
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}
CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;

vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
for (size_t iter=0; iter < 256; ++iter) {
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}

CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;

vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
}
++counts[ret];
}
++counts[ret];
}
}
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << "i=" << i;
break;
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << iter;
break;
}
}
}
}
Expand Down Expand Up @@ -135,6 +138,7 @@ void benchmarkRandom() {
LOG(INFO) << "sum1=" << sum1;
}


int main(int argc, char** argv) {
initMain(argc, argv);
testing::InitGoogleTest(&argc, argv);
Expand Down

0 comments on commit 7eb29f2

Please sign in to comment.