Skip to content

Commit

Permalink
tcmalloc: add test for CpuCache overflow/underflow refill count
Browse files Browse the repository at this point in the history
NFC

PiperOrigin-RevId: 561874969
Change-Id: Ibfae8a223512fa56e7de92c20cd5bb08cc841e8f
  • Loading branch information
dvyukov authored and copybara-github committed Sep 1, 2023
1 parent 30039dc commit d6f05e0
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 27 deletions.
62 changes: 35 additions & 27 deletions tcmalloc/cpu_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,40 @@ inline bool CpuCache<Forwarder>::UseBackingShardedTransferCache(
forwarder_.UseGenericShardedCache();
}

// Calculate number of objects to return/request from transfer cache.
inline size_t TargetOverflowRefillCount(size_t capacity, size_t batch_length,
size_t successive) {
// Calculate number of objects to return/request from transfer cache.
// Generally we prefer to transfer a single batch, because transfer cache
// handles it efficiently. Except for 2 special cases:
size_t target = batch_length;
// "capacity + 1" because on overflow we already have one object from caller,
// so we can return a whole batch even if capacity is one less. Similarly,
// on underflow we need to return one object to caller, so we can request
// a whole batch even if capacity is one less.
if ((capacity + 1) < batch_length) {
// If we don't have a full batch, return/request just half. We are missing
// transfer cache anyway, and cost of insertion into central freelist is
// ~O(number of objects).
target = std::max<size_t>(1, (capacity + 1) / 2);
ASSERT(target <= batch_length);
} else if (successive > 0 && capacity >= 3 * batch_length) {
// If the freelist is large and we are hitting series of overflows or
// underflows, return/request several batches at once. On the first overflow
// we return 1 batch, on the second -- 2, on the third -- 4 and so on up to
// half of the batches we have. We do this to save on the cost of hitting
// malloc/free slow path, reduce instruction cache pollution, avoid cache
// misses when accessing transfer/central caches, etc.
size_t num_batches =
std::min<size_t>(1 << std::min<uint32_t>(successive, 10),
((capacity / batch_length) + 1) / 2);
target = num_batches * batch_length;
}
ASSERT(target <= capacity + 1);
ASSERT(target != 0);
return target;
}

template <class Forwarder>
inline size_t CpuCache<Forwarder>::UpdateCapacity(int cpu, size_t size_class,
size_t batch_length,
Expand Down Expand Up @@ -1036,33 +1070,7 @@ inline size_t CpuCache<Forwarder>::UpdateCapacity(int cpu, size_t size_class,
Grow(cpu, size_class, increase, to_return);
capacity = freelist_.Capacity(cpu, size_class);
}
// Calculate number of objects to return/request from transfer cache.
// Generally we prefer to transfer a single batch, because transfer cache
// handles it efficiently. Except for 2 special cases:
size_t target = batch_length;
// "capacity + 1" because on overflow we already have one object from caller,
// so we can return a whole batch even if capacity is one less. Similarly,
// on underflow we need to return one object to caller, so we can request
// a whole batch even if capacity is one less.
if ((capacity + 1) < batch_length) {
// If we don't have a full batch, return/request just half. We are missing
// transfer cache anyway, and cost of insertion into central freelist is
// ~O(number of objects).
target = std::max<size_t>(1, (capacity + 1) / 2);
} else if (successive > 0 && capacity >= 3 * batch_length) {
// If the freelist is large and we are hitting series of overflows or
// underflows, return/request several batches at once. On the first overflow
// we return 1 batch, on the second -- 2, on the third -- 4 and so on up to
// half of the batches we have. We do this to save on the cost of hitting
// malloc/free slow path, reduce instruction cache pollution, avoid cache
// misses when accessing transfer/central caches, etc.
size_t num_batches =
std::min<size_t>(1 << std::min<uint32_t>(successive, 10),
((capacity / batch_length) + 1) / 2);
target = num_batches * batch_length;
}
ASSERT(target != 0);
return target;
return TargetOverflowRefillCount(capacity, batch_length, successive);
}

template <class Forwarder>
Expand Down
31 changes: 31 additions & 0 deletions tcmalloc/cpu_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,37 @@ TEST(CpuCacheTest, DISABLED_ChangingSizes) {
EXPECT_EQ(env.num_cpus() * last_cache_size, capacity);
}

TEST(CpuCacheTest, TargetOverflowRefillCount) {
auto F = cpu_cache_internal::TargetOverflowRefillCount;
// Args are: capacity, batch_length, successive.
EXPECT_EQ(F(0, 8, 0), 1);
EXPECT_EQ(F(0, 8, 10), 1);
EXPECT_EQ(F(1, 8, 0), 1);
EXPECT_EQ(F(1, 8, 1), 1);
EXPECT_EQ(F(1, 8, 2), 1);
EXPECT_EQ(F(1, 8, 3), 1);
EXPECT_EQ(F(1, 8, 4), 1);
EXPECT_EQ(F(2, 8, 0), 1);
EXPECT_EQ(F(3, 8, 0), 2);
EXPECT_EQ(F(4, 8, 0), 2);
EXPECT_EQ(F(5, 8, 0), 3);
EXPECT_EQ(F(6, 8, 0), 3);
EXPECT_EQ(F(7, 8, 0), 8);
EXPECT_EQ(F(8, 8, 0), 8);
EXPECT_EQ(F(9, 8, 0), 8);
EXPECT_EQ(F(100, 8, 0), 8);
EXPECT_EQ(F(23, 8, 1), 8);
EXPECT_EQ(F(24, 8, 1), 16);
EXPECT_EQ(F(100, 8, 1), 16);
EXPECT_EQ(F(24, 8, 2), 16);
EXPECT_EQ(F(32, 8, 2), 16);
EXPECT_EQ(F(40, 8, 2), 24);
EXPECT_EQ(F(100, 8, 2), 32);
EXPECT_EQ(F(48, 8, 3), 24);
EXPECT_EQ(F(56, 8, 3), 32);
EXPECT_EQ(F(100, 8, 3), 48);
}

} // namespace
} // namespace tcmalloc_internal
} // namespace tcmalloc

0 comments on commit d6f05e0

Please sign in to comment.