tcmalloc: add test for CpuCache overflow/underflow refill count

NFC PiperOrigin-RevId: 561874969 Change-Id: Ibfae8a223512fa56e7de92c20cd5bb08cc841e8f
patrickxia · Sep 1, 2023 · d6f05e0 · d6f05e0
1 parent 30039dc
commit d6f05e0
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 27 deletions.
diff --git a/tcmalloc/cpu_cache.h b/tcmalloc/cpu_cache.h
@@ -974,6 +974,40 @@ inline bool CpuCache<Forwarder>::UseBackingShardedTransferCache(
          forwarder_.UseGenericShardedCache();
 }
 
+// Calculate number of objects to return/request from transfer cache.
+inline size_t TargetOverflowRefillCount(size_t capacity, size_t batch_length,
+                                        size_t successive) {
+  // Calculate number of objects to return/request from transfer cache.
+  // Generally we prefer to transfer a single batch, because transfer cache
+  // handles it efficiently. Except for 2 special cases:
+  size_t target = batch_length;
+  // "capacity + 1" because on overflow we already have one object from caller,
+  // so we can return a whole batch even if capacity is one less. Similarly,
+  // on underflow we need to return one object to caller, so we can request
+  // a whole batch even if capacity is one less.
+  if ((capacity + 1) < batch_length) {
+    // If we don't have a full batch, return/request just half. We are missing
+    // transfer cache anyway, and cost of insertion into central freelist is
+    // ~O(number of objects).
+    target = std::max<size_t>(1, (capacity + 1) / 2);
+    ASSERT(target <= batch_length);
+  } else if (successive > 0 && capacity >= 3 * batch_length) {
+    // If the freelist is large and we are hitting series of overflows or
+    // underflows, return/request several batches at once. On the first overflow
+    // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to
+    // half of the batches we have. We do this to save on the cost of hitting
+    // malloc/free slow path, reduce instruction cache pollution, avoid cache
+    // misses when accessing transfer/central caches, etc.
+    size_t num_batches =
+        std::min<size_t>(1 << std::min<uint32_t>(successive, 10),
+                         ((capacity / batch_length) + 1) / 2);
+    target = num_batches * batch_length;
+  }
+  ASSERT(target <= capacity + 1);
+  ASSERT(target != 0);
+  return target;
+}
+
 template <class Forwarder>
 inline size_t CpuCache<Forwarder>::UpdateCapacity(int cpu, size_t size_class,
                                                   size_t batch_length,
@@ -1036,33 +1070,7 @@ inline size_t CpuCache<Forwarder>::UpdateCapacity(int cpu, size_t size_class,
     Grow(cpu, size_class, increase, to_return);
     capacity = freelist_.Capacity(cpu, size_class);
   }
-  // Calculate number of objects to return/request from transfer cache.
-  // Generally we prefer to transfer a single batch, because transfer cache
-  // handles it efficiently. Except for 2 special cases:
-  size_t target = batch_length;
-  // "capacity + 1" because on overflow we already have one object from caller,
-  // so we can return a whole batch even if capacity is one less. Similarly,
-  // on underflow we need to return one object to caller, so we can request
-  // a whole batch even if capacity is one less.
-  if ((capacity + 1) < batch_length) {
-    // If we don't have a full batch, return/request just half. We are missing
-    // transfer cache anyway, and cost of insertion into central freelist is
-    // ~O(number of objects).
-    target = std::max<size_t>(1, (capacity + 1) / 2);
-  } else if (successive > 0 && capacity >= 3 * batch_length) {
-    // If the freelist is large and we are hitting series of overflows or
-    // underflows, return/request several batches at once. On the first overflow
-    // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to
-    // half of the batches we have. We do this to save on the cost of hitting
-    // malloc/free slow path, reduce instruction cache pollution, avoid cache
-    // misses when accessing transfer/central caches, etc.
-    size_t num_batches =
-        std::min<size_t>(1 << std::min<uint32_t>(successive, 10),
-                         ((capacity / batch_length) + 1) / 2);
-    target = num_batches * batch_length;
-  }
-  ASSERT(target != 0);
-  return target;
+  return TargetOverflowRefillCount(capacity, batch_length, successive);
 }
 
 template <class Forwarder>

diff --git a/tcmalloc/cpu_cache_test.cc b/tcmalloc/cpu_cache_test.cc
@@ -1467,6 +1467,37 @@ TEST(CpuCacheTest, DISABLED_ChangingSizes) {
   EXPECT_EQ(env.num_cpus() * last_cache_size, capacity);
 }
 
+TEST(CpuCacheTest, TargetOverflowRefillCount) {
+  auto F = cpu_cache_internal::TargetOverflowRefillCount;
+  // Args are: capacity, batch_length, successive.
+  EXPECT_EQ(F(0, 8, 0), 1);
+  EXPECT_EQ(F(0, 8, 10), 1);
+  EXPECT_EQ(F(1, 8, 0), 1);
+  EXPECT_EQ(F(1, 8, 1), 1);
+  EXPECT_EQ(F(1, 8, 2), 1);
+  EXPECT_EQ(F(1, 8, 3), 1);
+  EXPECT_EQ(F(1, 8, 4), 1);
+  EXPECT_EQ(F(2, 8, 0), 1);
+  EXPECT_EQ(F(3, 8, 0), 2);
+  EXPECT_EQ(F(4, 8, 0), 2);
+  EXPECT_EQ(F(5, 8, 0), 3);
+  EXPECT_EQ(F(6, 8, 0), 3);
+  EXPECT_EQ(F(7, 8, 0), 8);
+  EXPECT_EQ(F(8, 8, 0), 8);
+  EXPECT_EQ(F(9, 8, 0), 8);
+  EXPECT_EQ(F(100, 8, 0), 8);
+  EXPECT_EQ(F(23, 8, 1), 8);
+  EXPECT_EQ(F(24, 8, 1), 16);
+  EXPECT_EQ(F(100, 8, 1), 16);
+  EXPECT_EQ(F(24, 8, 2), 16);
+  EXPECT_EQ(F(32, 8, 2), 16);
+  EXPECT_EQ(F(40, 8, 2), 24);
+  EXPECT_EQ(F(100, 8, 2), 32);
+  EXPECT_EQ(F(48, 8, 3), 24);
+  EXPECT_EQ(F(56, 8, 3), 32);
+  EXPECT_EQ(F(100, 8, 3), 48);
+}
+
 }  // namespace
 }  // namespace tcmalloc_internal
 }  // namespace tcmalloc