Skip to content

Commit

Permalink
first half of new slab automover
Browse files Browse the repository at this point in the history
If any slab classes have more than two pages worth of free chunks, attempt to
free one page back to a global pool.

Create new concept of a slab page move destination of "0", which is a global
page pool. Pages can be re-assigned out of that pool during allocation.

Combined with item rescuing from the previous patch, we can safely shuffle
pages back to the reassignment pool as chunks free up naturally. This should
be a safe default going forward. Users should be able to decide to free or
move pages based on eviction pressure as well. This is coming up in another
commit.

This also fixes a calculation of the NOEXP LRU size, and completely removes
the old slab automover thread. Slab automove decisions will now be part of the
lru maintainer thread.
  • Loading branch information
dormando committed Nov 19, 2015
1 parent d5185f9 commit d6e9646
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 125 deletions.
28 changes: 13 additions & 15 deletions items.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ int item_is_flushed(item *it) {

static unsigned int noexp_lru_size(int slabs_clsid) {
int id = CLEAR_LRU(slabs_clsid);
id |= NOEXP_LRU;
unsigned int ret;
pthread_mutex_lock(&lru_locks[id]);
ret = sizes[id];
Expand Down Expand Up @@ -478,20 +479,6 @@ char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, u
return buffer;
}

void item_stats_evictions(uint64_t *evicted) {
int n;
for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
int i;
int x;
for (x = 0; x < 4; x++) {
i = n | lru_type_map[x];
pthread_mutex_lock(&lru_locks[i]);
evicted[n] += itemstats[i].evicted;
pthread_mutex_unlock(&lru_locks[i]);
}
}
}

void item_stats_totals(ADD_STAT add_stats, void *c) {
itemstats_t totals;
memset(&totals, 0, sizeof(itemstats_t));
Expand Down Expand Up @@ -907,11 +894,22 @@ static int lru_maintainer_juggle(const int slabs_clsid) {
int did_moves = 0;
bool mem_limit_reached = false;
unsigned int total_chunks = 0;
unsigned int chunks_perslab = 0;
unsigned int chunks_free = 0;
/* TODO: if free_chunks below high watermark, increase aggressiveness */
slabs_available_chunks(slabs_clsid, &mem_limit_reached, &total_chunks);
chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached,
&total_chunks, &chunks_perslab);
if (settings.expirezero_does_not_evict)
total_chunks -= noexp_lru_size(slabs_clsid);

/* If slab automove is enabled on any level, and we have more than 2 pages
* worth of chunks free in this class, ask (gently) to reassign a page
* from this class back into the global pool (0)
*/
if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2)) {
slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL);
}

/* Juggle HOT/WARM up to N times */
for (i = 0; i < 1000; i++) {
int do_more = 0;
Expand Down
1 change: 0 additions & 1 deletion items.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv);
item *do_item_touch(const char *key, const size_t nkey, uint32_t exptime, const uint32_t hv);
void item_stats_reset(void);
extern pthread_mutex_t lru_locks[POWER_LARGEST];
void item_stats_evictions(uint64_t *evicted);

enum crawler_result_type {
CRAWLER_OK=0, CRAWLER_RUNNING, CRAWLER_BADCLASS, CRAWLER_NOTSTARTED
Expand Down
1 change: 1 addition & 0 deletions memcached.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
/* Slab sizing definitions. */
#define POWER_SMALLEST 1
#define POWER_LARGEST 256 /* actual cap is 255 */
#define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */
#define CHUNK_ALIGN_BYTES 8
/* slab class max is a 6-bit number, -1. */
#define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1)
Expand Down
140 changes: 33 additions & 107 deletions slabs.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,20 +194,34 @@ static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
}
}

/* Fast FIFO queue */
static void *get_page_from_global_pool(void) {
slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL];
if (p->slabs < 1) {
return NULL;
}
char *ret = p->slab_list[p->slabs - 1];
p->slabs--;
return ret;
}

static int do_slabs_newslab(const unsigned int id) {
slabclass_t *p = &slabclass[id];
slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
int len = settings.slab_reassign ? settings.item_size_max
: p->size * p->perslab;
char *ptr;

if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) {
if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
&& g->slabs == 0)) {
mem_limit_reached = true;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}

if ((grow_slab_list(id) == 0) ||
((ptr = memory_allocate((size_t)len)) == 0)) {
(((ptr = get_page_from_global_pool()) == NULL) &&
((ptr = memory_allocate((size_t)len)) == 0))) {

MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
Expand Down Expand Up @@ -307,6 +321,11 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
APPEND_STAT("curr_items", "%u", stats.curr_items);
APPEND_STAT("total_items", "%u", stats.total_items);
STATS_UNLOCK();
if (settings.slab_automove > 0) {
pthread_mutex_lock(&slabs_lock);
APPEND_STAT("slab_global_page_pool", "%u", slabclass[SLAB_GLOBAL_PAGE_POOL].slabs);
pthread_mutex_unlock(&slabs_lock);
}
item_stats_totals(add_stats, c);
} else if (nz_strcmp(nkey, stat_type, "items") == 0) {
item_stats(add_stats, c);
Expand Down Expand Up @@ -446,7 +465,7 @@ void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal)
}

unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
unsigned int *total_chunks) {
unsigned int *total_chunks, unsigned int *chunks_perslab) {
unsigned int ret;
slabclass_t *p;

Expand All @@ -457,6 +476,8 @@ unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
*mem_flag = mem_limit_reached;
if (total_chunks != NULL)
*total_chunks = p->slabs * p->perslab;
if (chunks_perslab != NULL)
*chunks_perslab = p->perslab;
pthread_mutex_unlock(&slabs_lock);
return ret;
}
Expand All @@ -476,7 +497,7 @@ static int slab_rebalance_start(void) {

if (slab_rebal.s_clsid < POWER_SMALLEST ||
slab_rebal.s_clsid > power_largest ||
slab_rebal.d_clsid < POWER_SMALLEST ||
slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL ||
slab_rebal.d_clsid > power_largest ||
slab_rebal.s_clsid == slab_rebal.d_clsid)
no_go = -2;
Expand Down Expand Up @@ -720,7 +741,7 @@ static void slab_rebalance_finish(void) {
pthread_mutex_lock(&slabs_lock);

s_cls = &slabclass[slab_rebal.s_clsid];
d_cls = &slabclass[slab_rebal.d_clsid];
d_cls = &slabclass[slab_rebal.d_clsid];

/* At this point the stolen slab is completely clear.
* We always kill the "first"/"oldest" slab page in the slab_list, so
Expand All @@ -734,8 +755,11 @@ static void slab_rebalance_finish(void) {
memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max);

d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;
split_slab_page_into_freelist(slab_rebal.slab_start,
slab_rebal.d_clsid);
/* Don't need to split the page into chunks if we're just storing it */
if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) {
split_slab_page_into_freelist(slab_rebal.slab_start,
slab_rebal.d_clsid);
}

slab_rebal.done = 0;
slab_rebal.s_clsid = 0;
Expand All @@ -758,97 +782,6 @@ static void slab_rebalance_finish(void) {
}
}

/* Return 1 means a decision was reached.
* Move to its own thread (created/destroyed as needed) once automover is more
* complex.
*/
static int slab_automove_decision(int *src, int *dst) {
static uint64_t evicted_old[MAX_NUMBER_OF_SLAB_CLASSES];
static unsigned int slab_zeroes[MAX_NUMBER_OF_SLAB_CLASSES];
static unsigned int slab_winner = 0;
static unsigned int slab_wins = 0;
uint64_t evicted_new[MAX_NUMBER_OF_SLAB_CLASSES];
uint64_t evicted_diff = 0;
uint64_t evicted_max = 0;
unsigned int highest_slab = 0;
unsigned int total_pages[MAX_NUMBER_OF_SLAB_CLASSES];
int i;
int source = 0;
int dest = 0;
static rel_time_t next_run;

/* Run less frequently than the slabmove tester. */
if (current_time >= next_run) {
next_run = current_time + 10;
} else {
return 0;
}

item_stats_evictions(evicted_new);
pthread_mutex_lock(&slabs_lock);
for (i = POWER_SMALLEST; i < power_largest; i++) {
total_pages[i] = slabclass[i].slabs;
}
pthread_mutex_unlock(&slabs_lock);

/* Find a candidate source; something with zero evicts 3+ times */
for (i = POWER_SMALLEST; i < power_largest; i++) {
evicted_diff = evicted_new[i] - evicted_old[i];
if (evicted_diff == 0 && total_pages[i] > 2) {
slab_zeroes[i]++;
if (source == 0 && slab_zeroes[i] >= 3)
source = i;
} else {
slab_zeroes[i] = 0;
if (evicted_diff > evicted_max) {
evicted_max = evicted_diff;
highest_slab = i;
}
}
evicted_old[i] = evicted_new[i];
}

/* Pick a valid destination */
if (slab_winner != 0 && slab_winner == highest_slab) {
slab_wins++;
if (slab_wins >= 3)
dest = slab_winner;
} else {
slab_wins = 1;
slab_winner = highest_slab;
}

if (source && dest) {
*src = source;
*dst = dest;
return 1;
}
return 0;
}

/* Slab rebalancer thread.
* Does not use spinlocks since it is not timing sensitive. Burn less CPU and
* go to sleep if locks are contended
*/
static void *slab_maintenance_thread(void *arg) {
int src, dest;

while (do_run_slab_thread) {
if (settings.slab_automove == 1) {
if (slab_automove_decision(&src, &dest) == 1) {
/* Blind to the return codes. It will retry on its own */
slabs_reassign(src, dest);
}
sleep(1);
} else {
/* Don't wake as often if we're not enabled.
* This is lazier than setting up a condition right now. */
sleep(5);
}
}
return NULL;
}

/* Slab mover thread.
* Sits waiting for a condition to jump off and shovel some memory about
*/
Expand Down Expand Up @@ -918,8 +851,8 @@ static enum reassign_result_type do_slabs_reassign(int src, int dst) {
/* TODO: If we end up back at -1, return a new error type */
}

if (src < POWER_SMALLEST || src > power_largest ||
dst < POWER_SMALLEST || dst > power_largest)
if (src < POWER_SMALLEST || src > power_largest ||
dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest)
return REASSIGN_BADCLASS;

if (slabclass[src].slabs < 2)
Expand Down Expand Up @@ -953,7 +886,6 @@ void slabs_rebalancer_resume(void) {
pthread_mutex_unlock(&slabs_rebalance_lock);
}

static pthread_t maintenance_tid;
static pthread_t rebalance_tid;

int start_slab_maintenance_thread(void) {
Expand All @@ -974,11 +906,6 @@ int start_slab_maintenance_thread(void) {
}
pthread_mutex_init(&slabs_rebalance_lock, NULL);

if ((ret = pthread_create(&maintenance_tid, NULL,
slab_maintenance_thread, NULL)) != 0) {
fprintf(stderr, "Can't create slab maint thread: %s\n", strerror(ret));
return -1;
}
if ((ret = pthread_create(&rebalance_tid, NULL,
slab_rebalance_thread, NULL)) != 0) {
fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret));
Expand All @@ -997,6 +924,5 @@ void stop_slab_maintenance_thread(void) {
pthread_mutex_unlock(&slabs_rebalance_lock);

/* Wait for the maintenance thread to stop */
pthread_join(maintenance_tid, NULL);
pthread_join(rebalance_tid, NULL);
}
2 changes: 1 addition & 1 deletion slabs.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c);
void slabs_stats(ADD_STAT add_stats, void *c);

/* Hints as to freespace in slab class */
unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks);
unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks, unsigned int *chunks_perslab);

int start_slab_maintenance_thread(void);
void stop_slab_maintenance_thread(void);
Expand Down
26 changes: 25 additions & 1 deletion t/slabs-reassign2.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use strict;
use warnings;
use Test::More tests => 5;
use Test::More tests => 9;
use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;
Expand Down Expand Up @@ -62,3 +62,27 @@ cmp_ok($hits, '>', 4000, 'were able to fetch back 2/3rds of 8k keys');
my $stats_done = mem_stats($sock);
cmp_ok($stats_done->{slab_reassign_rescues}, '>', 0, 'some reassign rescues happened');
cmp_ok($stats_done->{slab_reassign_evictions}, '>', 0, 'some reassing evictions happened');

print $sock "flush_all\r\n";
is(scalar <$sock>, "OK\r\n", "did flush_all");
my $tries;
for ($tries = 20; $tries > 0; $tries--) {
sleep 1;
my $stats = mem_stats($sock);
if ($stats->{slab_global_page_pool} == 61) {
last;
}
}
cmp_ok($tries, '>', 0, 'reclaimed 61 pages before timeout');

# Set into an entirely new class. Overload a bit to try to cause problems.
$value = "B"x4096;
for (1 .. $keycount * 4) {
print $sock "set jfoo$_ 0 0 4096 noreply\r\n$value\r\n";
}

{
my $stats = mem_stats($sock);
is($stats->{curr_items}, 14490, "stored 14490 4k items");
is($stats->{slab_global_page_pool}, 0, "drained the global page pool");
}

0 comments on commit d6e9646

Please sign in to comment.