diff --git a/items.c b/items.c index 950ba60997..98bebb0478 100644 --- a/items.c +++ b/items.c @@ -113,6 +113,7 @@ int item_is_flushed(item *it) { static unsigned int noexp_lru_size(int slabs_clsid) { int id = CLEAR_LRU(slabs_clsid); + id |= NOEXP_LRU; unsigned int ret; pthread_mutex_lock(&lru_locks[id]); ret = sizes[id]; @@ -478,20 +479,6 @@ char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, u return buffer; } -void item_stats_evictions(uint64_t *evicted) { - int n; - for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) { - int i; - int x; - for (x = 0; x < 4; x++) { - i = n | lru_type_map[x]; - pthread_mutex_lock(&lru_locks[i]); - evicted[n] += itemstats[i].evicted; - pthread_mutex_unlock(&lru_locks[i]); - } - } -} - void item_stats_totals(ADD_STAT add_stats, void *c) { itemstats_t totals; memset(&totals, 0, sizeof(itemstats_t)); @@ -907,11 +894,22 @@ static int lru_maintainer_juggle(const int slabs_clsid) { int did_moves = 0; bool mem_limit_reached = false; unsigned int total_chunks = 0; + unsigned int chunks_perslab = 0; + unsigned int chunks_free = 0; /* TODO: if free_chunks below high watermark, increase aggressiveness */ - slabs_available_chunks(slabs_clsid, &mem_limit_reached, &total_chunks); + chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached, + &total_chunks, &chunks_perslab); if (settings.expirezero_does_not_evict) total_chunks -= noexp_lru_size(slabs_clsid); + /* If slab automove is enabled on any level, and we have more than 2 pages + * worth of chunks free in this class, ask (gently) to reassign a page + * from this class back into the global pool (0) + */ + if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2)) { + slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL); + } + /* Juggle HOT/WARM up to N times */ for (i = 0; i < 1000; i++) { int do_more = 0; diff --git a/items.h b/items.h index f47de8f973..4e492b44e6 100644 --- a/items.h +++ b/items.h @@ -27,7 +27,6 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv); item *do_item_touch(const char *key, const size_t nkey, uint32_t exptime, const uint32_t hv); void item_stats_reset(void); extern pthread_mutex_t lru_locks[POWER_LARGEST]; -void item_stats_evictions(uint64_t *evicted); enum crawler_result_type { CRAWLER_OK=0, CRAWLER_RUNNING, CRAWLER_BADCLASS, CRAWLER_NOTSTARTED diff --git a/memcached.h b/memcached.h index 05eeb049b2..c5c348ac9d 100644 --- a/memcached.h +++ b/memcached.h @@ -78,6 +78,7 @@ /* Slab sizing definitions. */ #define POWER_SMALLEST 1 #define POWER_LARGEST 256 /* actual cap is 255 */ +#define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */ #define CHUNK_ALIGN_BYTES 8 /* slab class max is a 6-bit number, -1. */ #define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1) diff --git a/slabs.c b/slabs.c index 3518098541..1c8837634b 100644 --- a/slabs.c +++ b/slabs.c @@ -194,20 +194,34 @@ static void split_slab_page_into_freelist(char *ptr, const unsigned int id) { } } +/* Fast FIFO queue */ +static void *get_page_from_global_pool(void) { + slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL]; + if (p->slabs < 1) { + return NULL; + } + char *ret = p->slab_list[p->slabs - 1]; + p->slabs--; + return ret; +} + static int do_slabs_newslab(const unsigned int id) { slabclass_t *p = &slabclass[id]; + slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL]; int len = settings.slab_reassign ? settings.item_size_max : p->size * p->perslab; char *ptr; - if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) { + if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0 + && g->slabs == 0)) { mem_limit_reached = true; MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id); return 0; } if ((grow_slab_list(id) == 0) || - ((ptr = memory_allocate((size_t)len)) == 0)) { + (((ptr = get_page_from_global_pool()) == NULL) && + ((ptr = memory_allocate((size_t)len)) == 0))) { MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id); return 0; @@ -307,6 +321,11 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) { APPEND_STAT("curr_items", "%u", stats.curr_items); APPEND_STAT("total_items", "%u", stats.total_items); STATS_UNLOCK(); + if (settings.slab_automove > 0) { + pthread_mutex_lock(&slabs_lock); + APPEND_STAT("slab_global_page_pool", "%u", slabclass[SLAB_GLOBAL_PAGE_POOL].slabs); + pthread_mutex_unlock(&slabs_lock); + } item_stats_totals(add_stats, c); } else if (nz_strcmp(nkey, stat_type, "items") == 0) { item_stats(add_stats, c); @@ -446,7 +465,7 @@ void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal) } unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag, - unsigned int *total_chunks) { + unsigned int *total_chunks, unsigned int *chunks_perslab) { unsigned int ret; slabclass_t *p; @@ -457,6 +476,8 @@ unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag, *mem_flag = mem_limit_reached; if (total_chunks != NULL) *total_chunks = p->slabs * p->perslab; + if (chunks_perslab != NULL) + *chunks_perslab = p->perslab; pthread_mutex_unlock(&slabs_lock); return ret; } @@ -476,7 +497,7 @@ static int slab_rebalance_start(void) { if (slab_rebal.s_clsid < POWER_SMALLEST || slab_rebal.s_clsid > power_largest || - slab_rebal.d_clsid < POWER_SMALLEST || + slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL || slab_rebal.d_clsid > power_largest || slab_rebal.s_clsid == slab_rebal.d_clsid) no_go = -2; @@ -720,7 +741,7 @@ static void slab_rebalance_finish(void) { pthread_mutex_lock(&slabs_lock); s_cls = &slabclass[slab_rebal.s_clsid]; - d_cls = &slabclass[slab_rebal.d_clsid]; + d_cls = &slabclass[slab_rebal.d_clsid]; /* At this point the stolen slab is completely clear. * We always kill the "first"/"oldest" slab page in the slab_list, so @@ -734,8 +755,11 @@ static void slab_rebalance_finish(void) { memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max); d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start; - split_slab_page_into_freelist(slab_rebal.slab_start, - slab_rebal.d_clsid); + /* Don't need to split the page into chunks if we're just storing it */ + if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) { + split_slab_page_into_freelist(slab_rebal.slab_start, + slab_rebal.d_clsid); + } slab_rebal.done = 0; slab_rebal.s_clsid = 0; @@ -758,97 +782,6 @@ static void slab_rebalance_finish(void) { } } -/* Return 1 means a decision was reached. - * Move to its own thread (created/destroyed as needed) once automover is more - * complex. - */ -static int slab_automove_decision(int *src, int *dst) { - static uint64_t evicted_old[MAX_NUMBER_OF_SLAB_CLASSES]; - static unsigned int slab_zeroes[MAX_NUMBER_OF_SLAB_CLASSES]; - static unsigned int slab_winner = 0; - static unsigned int slab_wins = 0; - uint64_t evicted_new[MAX_NUMBER_OF_SLAB_CLASSES]; - uint64_t evicted_diff = 0; - uint64_t evicted_max = 0; - unsigned int highest_slab = 0; - unsigned int total_pages[MAX_NUMBER_OF_SLAB_CLASSES]; - int i; - int source = 0; - int dest = 0; - static rel_time_t next_run; - - /* Run less frequently than the slabmove tester. */ - if (current_time >= next_run) { - next_run = current_time + 10; - } else { - return 0; - } - - item_stats_evictions(evicted_new); - pthread_mutex_lock(&slabs_lock); - for (i = POWER_SMALLEST; i < power_largest; i++) { - total_pages[i] = slabclass[i].slabs; - } - pthread_mutex_unlock(&slabs_lock); - - /* Find a candidate source; something with zero evicts 3+ times */ - for (i = POWER_SMALLEST; i < power_largest; i++) { - evicted_diff = evicted_new[i] - evicted_old[i]; - if (evicted_diff == 0 && total_pages[i] > 2) { - slab_zeroes[i]++; - if (source == 0 && slab_zeroes[i] >= 3) - source = i; - } else { - slab_zeroes[i] = 0; - if (evicted_diff > evicted_max) { - evicted_max = evicted_diff; - highest_slab = i; - } - } - evicted_old[i] = evicted_new[i]; - } - - /* Pick a valid destination */ - if (slab_winner != 0 && slab_winner == highest_slab) { - slab_wins++; - if (slab_wins >= 3) - dest = slab_winner; - } else { - slab_wins = 1; - slab_winner = highest_slab; - } - - if (source && dest) { - *src = source; - *dst = dest; - return 1; - } - return 0; -} - -/* Slab rebalancer thread. - * Does not use spinlocks since it is not timing sensitive. Burn less CPU and - * go to sleep if locks are contended - */ -static void *slab_maintenance_thread(void *arg) { - int src, dest; - - while (do_run_slab_thread) { - if (settings.slab_automove == 1) { - if (slab_automove_decision(&src, &dest) == 1) { - /* Blind to the return codes. It will retry on its own */ - slabs_reassign(src, dest); - } - sleep(1); - } else { - /* Don't wake as often if we're not enabled. - * This is lazier than setting up a condition right now. */ - sleep(5); - } - } - return NULL; -} - /* Slab mover thread. * Sits waiting for a condition to jump off and shovel some memory about */ @@ -918,8 +851,8 @@ static enum reassign_result_type do_slabs_reassign(int src, int dst) { /* TODO: If we end up back at -1, return a new error type */ } - if (src < POWER_SMALLEST || src > power_largest || - dst < POWER_SMALLEST || dst > power_largest) + if (src < POWER_SMALLEST || src > power_largest || + dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest) return REASSIGN_BADCLASS; if (slabclass[src].slabs < 2) @@ -953,7 +886,6 @@ void slabs_rebalancer_resume(void) { pthread_mutex_unlock(&slabs_rebalance_lock); } -static pthread_t maintenance_tid; static pthread_t rebalance_tid; int start_slab_maintenance_thread(void) { @@ -974,11 +906,6 @@ int start_slab_maintenance_thread(void) { } pthread_mutex_init(&slabs_rebalance_lock, NULL); - if ((ret = pthread_create(&maintenance_tid, NULL, - slab_maintenance_thread, NULL)) != 0) { - fprintf(stderr, "Can't create slab maint thread: %s\n", strerror(ret)); - return -1; - } if ((ret = pthread_create(&rebalance_tid, NULL, slab_rebalance_thread, NULL)) != 0) { fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret)); @@ -997,6 +924,5 @@ void stop_slab_maintenance_thread(void) { pthread_mutex_unlock(&slabs_rebalance_lock); /* Wait for the maintenance thread to stop */ - pthread_join(maintenance_tid, NULL); pthread_join(rebalance_tid, NULL); } diff --git a/slabs.h b/slabs.h index 1eac5c8066..fb29cfa74c 100644 --- a/slabs.h +++ b/slabs.h @@ -34,7 +34,7 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c); void slabs_stats(ADD_STAT add_stats, void *c); /* Hints as to freespace in slab class */ -unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks); +unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks, unsigned int *chunks_perslab); int start_slab_maintenance_thread(void); void stop_slab_maintenance_thread(void); diff --git a/t/slabs-reassign2.t b/t/slabs-reassign2.t index 8de4a05e51..91351704ec 100644 --- a/t/slabs-reassign2.t +++ b/t/slabs-reassign2.t @@ -2,7 +2,7 @@ use strict; use warnings; -use Test::More tests => 5; +use Test::More tests => 9; use FindBin qw($Bin); use lib "$Bin/lib"; use MemcachedTest; @@ -62,3 +62,27 @@ cmp_ok($hits, '>', 4000, 'were able to fetch back 2/3rds of 8k keys'); my $stats_done = mem_stats($sock); cmp_ok($stats_done->{slab_reassign_rescues}, '>', 0, 'some reassign rescues happened'); cmp_ok($stats_done->{slab_reassign_evictions}, '>', 0, 'some reassing evictions happened'); + +print $sock "flush_all\r\n"; +is(scalar <$sock>, "OK\r\n", "did flush_all"); +my $tries; +for ($tries = 20; $tries > 0; $tries--) { + sleep 1; + my $stats = mem_stats($sock); + if ($stats->{slab_global_page_pool} == 61) { + last; + } +} +cmp_ok($tries, '>', 0, 'reclaimed 61 pages before timeout'); + +# Set into an entirely new class. Overload a bit to try to cause problems. +$value = "B"x4096; +for (1 .. $keycount * 4) { + print $sock "set jfoo$_ 0 0 4096 noreply\r\n$value\r\n"; +} + +{ + my $stats = mem_stats($sock); + is($stats->{curr_items}, 14490, "stored 14490 4k items"); + is($stats->{slab_global_page_pool}, 0, "drained the global page pool"); +}