Skip to content

Commit

Permalink
alloc loop now attempts an item_lock
Browse files Browse the repository at this point in the history
Fixes a few issues with a restructuring... I think -M was broken before,
should be fixed now. It had a refcount leak.

Now walks up to five items from the bottom in case of the bottomost items
being item_locked, or refcount locked. Helps avoid excessive OOM errors for
some oddball cases. Those happen more often if you're hammering on a handful
of pages in a very large class size (100k+)

The hash item lock ensures that if we're holding that lock, no other thread
can be incrementing the refcount lock at that time. It will mean more in
future patches.

slab rebalancer gets a similar update.
  • Loading branch information
dormando committed Sep 3, 2012
1 parent ad27588 commit 2db1bf4
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 84 deletions.
119 changes: 67 additions & 52 deletions items.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ static size_t item_make_header(const uint8_t nkey, const int flags, const int nb
}

/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
item *do_item_alloc(char *key, const size_t nkey, const int flags,
const rel_time_t exptime, const int nbytes,
const uint32_t cur_hv) {
uint8_t nsuffix;
item *it = NULL;
char suffix[40];
Expand All @@ -100,13 +102,39 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim

mutex_lock(&cache_lock);
/* do a quick check if we have any expired items in the tail.. */
int tries = 5;
int tried_alloc = 0;
item *search;
rel_time_t oldest_live = settings.oldest_live;

search = tails[id];
if (search != NULL && (refcount_incr(&search->refcount) == 2)) {
/* We walk up *only* for locked items. Never searching for expired.
* Waste of CPU for almost all deployments */
for (; tries > 0 && search != NULL; tries--, search=search->prev) {
uint32_t hv = hash(ITEM_key(search), search->nkey, 0);
/* Attempt to hash item lock the "search" item. If locked, no
* other callers can incr the refcount
*/
if (hv != cur_hv && item_trylock(hv) != 0)
continue;
/* Now see if the item is refcount locked */
if (refcount_incr(&search->refcount) != 2) {
refcount_decr(&search->refcount);
/* Old rare bug could cause a refcount leak. We haven't seen
* it in years, but we leave this code in to prevent failures
* just in case */
if (search->time + TAIL_REPAIR_TIME < current_time) {
itemstats[id].tailrepairs++;
search->refcount = 1;
do_item_unlink_nolock(search, hv);
}
item_unlock(hv);
continue;
}

/* Expired or flushed */
if ((search->exptime != 0 && search->exptime < current_time)
|| (search->time <= oldest_live && oldest_live <= current_time)) { // dead by flush
|| (search->time <= oldest_live && oldest_live <= current_time)) {
STATS_LOCK();
stats.reclaimed++;
STATS_UNLOCK();
Expand All @@ -119,68 +147,55 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
}
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
do_item_unlink_nolock(it, hv);
/* Initialize the item block: */
it->slabs_clsid = 0;
} else if ((it = slabs_alloc(ntotal, id)) == NULL) {
tried_alloc = 1;
if (settings.evict_to_free == 0) {
itemstats[id].outofmemory++;
mutex_unlock(&cache_lock);
return NULL;
}
itemstats[id].evicted++;
itemstats[id].evicted_time = current_time - search->time;
if (search->exptime != 0)
itemstats[id].evicted_nonzero++;
if ((search->it_flags & ITEM_FETCHED) == 0) {
} else {
itemstats[id].evicted++;
itemstats[id].evicted_time = current_time - search->time;
if (search->exptime != 0)
itemstats[id].evicted_nonzero++;
if ((search->it_flags & ITEM_FETCHED) == 0) {
STATS_LOCK();
stats.evicted_unfetched++;
STATS_UNLOCK();
itemstats[id].evicted_unfetched++;
}
STATS_LOCK();
stats.evicted_unfetched++;
stats.evictions++;
STATS_UNLOCK();
itemstats[id].evicted_unfetched++;
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hv);
/* Initialize the item block: */
it->slabs_clsid = 0;

/* If we've just evicted an item, and the automover is set to
* angry bird mode, attempt to rip memory into this slab class.
* TODO: Move valid object detection into a function, and on a
* "successful" memory pull, look behind and see if the next alloc
* would be an eviction. Then kick off the slab mover before the
* eviction happens.
*/
if (settings.slab_automove == 2)
slabs_reassign(-1, id);
}
STATS_LOCK();
stats.evictions++;
STATS_UNLOCK();
it = search;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
/* Initialize the item block: */
it->slabs_clsid = 0;

/* If we've just evicted an item, and the automover is set to
* angry bird mode, attempt to rip memory into this slab class.
* TODO: Move valid object detection into a function, and on a
* "successful" memory pull, look behind and see if the next alloc
* would be an eviction. Then kick off the slab mover before the
* eviction happens.
*/
if (settings.slab_automove == 2)
slabs_reassign(-1, id);
} else {
refcount_decr(&search->refcount);
}
} else {
/* If the LRU is empty or locked, attempt to allocate memory */
it = slabs_alloc(ntotal, id);
if (search != NULL)
refcount_decr(&search->refcount);

refcount_decr(&search->refcount);
item_unlock(hv);
break;
}

if (!tried_alloc && (tries == 0 || search == NULL))
it = slabs_alloc(ntotal, id);

if (it == NULL) {
itemstats[id].outofmemory++;
/* Last ditch effort. There was a very rare bug which caused
* refcount leaks. We leave this just in case they ever happen again.
* We can reasonably assume no item can stay locked for more than
* three hours, so if we find one in the tail which is that old,
* free it anyway.
*/
if (search != NULL &&
search->refcount != 2 &&
search->time + TAIL_REPAIR_TIME < current_time) {
itemstats[id].tailrepairs++;
search->refcount = 1;
do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
}
mutex_unlock(&cache_lock);
return NULL;
}
Expand Down
2 changes: 1 addition & 1 deletion items.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
uint64_t get_cas_id(void);

/*@null@*/
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes);
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv);
void item_free(item *it);
bool item_size_ok(const size_t nkey, const int flags, const int nbytes);

Expand Down
4 changes: 2 additions & 2 deletions memcached.c
Original file line number Diff line number Diff line change
Expand Up @@ -2325,7 +2325,7 @@ enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t h

flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10);

new_it = item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */);
new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, hv);

if (new_it == NULL) {
/* SERVER_ERROR out of memory */
Expand Down Expand Up @@ -3101,7 +3101,7 @@ enum delta_result_type do_add_delta(conn *c, const char *key, const size_t nkey,
res = strlen(buf);
if (res + 2 > it->nbytes || it->refcount != 1) { /* need to realloc */
item *new_it;
new_it = item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2 );
new_it = do_item_alloc(ITEM_key(it), it->nkey, atoi(ITEM_suffix(it) + 1), it->exptime, res + 2, hv);
if (new_it == 0) {
do_item_remove(it);
return EOM;
Expand Down
1 change: 1 addition & 0 deletions memcached.h
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ void item_unlink(item *it);
void item_update(item *it);

void item_lock(uint32_t hv);
int item_trylock(uint32_t hv);
void item_unlock(uint32_t hv);
unsigned short refcount_incr(unsigned short *refcount);
unsigned short refcount_decr(unsigned short *refcount);
Expand Down
63 changes: 35 additions & 28 deletions slabs.c
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ static int slab_rebalance_start(void) {
}

enum move_status {
MOVE_PASS=0, MOVE_DONE, MOVE_BUSY
MOVE_PASS=0, MOVE_DONE, MOVE_BUSY, MOVE_LOCKED
};

/* refcount == 0 is safe since nobody can incr while cache_lock is held.
Expand All @@ -522,36 +522,42 @@ static int slab_rebalance_move(void) {
item *it = slab_rebal.slab_pos;
status = MOVE_PASS;
if (it->slabs_clsid != 255) {
refcount = refcount_incr(&it->refcount);
if (refcount == 1) { /* item is unlinked, unused */
if (it->it_flags & ITEM_SLABBED) {
/* remove from slab freelist */
if (s_cls->slots == it) {
s_cls->slots = it->next;
uint32_t hv = hash(ITEM_key(it), it->nkey, 0);
if (item_trylock(hv) != 0) {
status = MOVE_LOCKED;
} else {
refcount = refcount_incr(&it->refcount);
if (refcount == 1) { /* item is unlinked, unused */
if (it->it_flags & ITEM_SLABBED) {
/* remove from slab freelist */
if (s_cls->slots == it) {
s_cls->slots = it->next;
}
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
s_cls->sl_curr--;
status = MOVE_DONE;
} else {
status = MOVE_BUSY;
}
} else if (refcount == 2) { /* item is linked but not busy */
if ((it->it_flags & ITEM_LINKED) != 0) {
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
status = MOVE_DONE;
} else {
/* refcount == 1 + !ITEM_LINKED means the item is being
* uploaded to, or was just unlinked but hasn't been freed
* yet. Let it bleed off on its own and try again later */
status = MOVE_BUSY;
}
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
s_cls->sl_curr--;
status = MOVE_DONE;
} else {
status = MOVE_BUSY;
}
} else if (refcount == 2) { /* item is linked but not busy */
if ((it->it_flags & ITEM_LINKED) != 0) {
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
status = MOVE_DONE;
} else {
/* refcount == 1 + !ITEM_LINKED means the item is being
* uploaded to, or was just unlinked but hasn't been freed
* yet. Let it bleed off on its own and try again later */
if (settings.verbose > 2) {
fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
}
status = MOVE_BUSY;
}
} else {
if (settings.verbose > 2) {
fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
}
status = MOVE_BUSY;
item_unlock(hv);
}
}

Expand All @@ -562,9 +568,10 @@ static int slab_rebalance_move(void) {
it->slabs_clsid = 255;
break;
case MOVE_BUSY:
refcount_decr(&it->refcount);
case MOVE_LOCKED:
slab_rebal.busy_items++;
was_busy++;
refcount_decr(&it->refcount);
break;
case MOVE_PASS:
break;
Expand Down
6 changes: 5 additions & 1 deletion thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ void item_lock(uint32_t hv) {
mutex_lock(&item_locks[hv & item_lock_mask]);
}

int item_trylock(uint32_t hv) {
return pthread_mutex_trylock(&item_locks[hv & item_lock_mask]);
}

void item_unlock(uint32_t hv) {
mutex_unlock(&item_locks[hv & item_lock_mask]);
}
Expand Down Expand Up @@ -381,7 +385,7 @@ int is_listen_thread() {
item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) {
item *it;
/* do_item_alloc handles its own locks */
it = do_item_alloc(key, nkey, flags, exptime, nbytes);
it = do_item_alloc(key, nkey, flags, exptime, nbytes, 0);
return it;
}

Expand Down

0 comments on commit 2db1bf4

Please sign in to comment.