Skip to content

Commit

Permalink
libceph: support crush tunables
Browse files Browse the repository at this point in the history
The server side recently added support for tuning some magic
crush variables. Decode these variables if they are present, or use the
default values if they are not present.

Corresponds to ceph.git commit 89af369c25f274fe62ef730e5e8aad0c54f1e5a5.

Signed-off-by: caleb miles <[email protected]>
Reviewed-by: Sage Weil <[email protected]>
Reviewed-by: Alex Elder <[email protected]>
Reviewed-by: Yehuda Sadeh <[email protected]>
  • Loading branch information
Sage Weil committed Jul 31, 2012
1 parent 1fe60e5 commit 546f04e
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 7 deletions.
5 changes: 4 additions & 1 deletion include/linux/ceph/ceph_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
#define CEPH_FEATURE_MONNAMES (1<<5)
#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
/* bits 8-17 defined by user-space; not supported yet here */
#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)

/*
* Features supported.
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR)
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_CRUSH_TUNABLES)

#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR)
Expand Down
8 changes: 8 additions & 0 deletions include/linux/crush/crush.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,14 @@ struct crush_map {
__s32 max_buckets;
__u32 max_rules;
__s32 max_devices;

/* choose local retries before re-descent */
__u32 choose_local_tries;
/* choose local attempts using a fallback permutation before
* re-descent */
__u32 choose_local_fallback_tries;
/* choose attempts before giving up */
__u32 choose_total_tries;
};


Expand Down
13 changes: 7 additions & 6 deletions net/ceph/crush/mapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
const unsigned int orig_tries = 5; /* attempts before we fall back to search */

dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
Expand Down Expand Up @@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map,
reject = 1;
goto reject;
}
if (flocal >= (in->size>>1) &&
flocal > orig_tries)
if (map->choose_local_fallback_tries > 0 &&
flocal >= (in->size>>1) &&
flocal > map->choose_local_fallback_tries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
Expand Down Expand Up @@ -422,13 +422,14 @@ static int crush_choose(const struct crush_map *map,
ftotal++;
flocal++;

if (collide && flocal < 3)
if (collide && flocal <= map->choose_local_tries)
/* retry locally a few times */
retry_bucket = 1;
else if (flocal <= in->size + orig_tries)
else if (map->choose_local_fallback_tries > 0 &&
flocal <= in->size + map->choose_local_fallback_tries)
/* exhaustive bucket search */
retry_bucket = 1;
else if (ftotal < 20)
else if (ftotal <= map->choose_total_tries)
/* then retry descent */
retry_descent = 1;
else
Expand Down
39 changes: 39 additions & 0 deletions net/ceph/osdmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,21 @@ static int crush_decode_straw_bucket(void **p, void *end,
return -EINVAL;
}

static int skip_name_map(void **p, void *end)
{
int len;
ceph_decode_32_safe(p, end, len ,bad);
while (len--) {
int strlen;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, strlen, bad);
*p += strlen;
}
return 0;
bad:
return -EINVAL;
}

static struct crush_map *crush_decode(void *pbyval, void *end)
{
struct crush_map *c;
Expand All @@ -143,13 +158,19 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
void **p = &pbyval;
void *start = pbyval;
u32 magic;
u32 num_name_maps;

dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));

c = kzalloc(sizeof(*c), GFP_NOFS);
if (c == NULL)
return ERR_PTR(-ENOMEM);

/* set tunables to default values */
c->choose_local_tries = 2;
c->choose_local_fallback_tries = 5;
c->choose_total_tries = 19;

ceph_decode_need(p, end, 4*sizeof(u32), bad);
magic = ceph_decode_32(p);
if (magic != CRUSH_MAGIC) {
Expand Down Expand Up @@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
}

/* ignore trailing name maps. */
for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
err = skip_name_map(p, end);
if (err < 0)
goto done;
}

/* tunables */
ceph_decode_need(p, end, 3*sizeof(u32), done);
c->choose_local_tries = ceph_decode_32(p);
c->choose_local_fallback_tries = ceph_decode_32(p);
c->choose_total_tries = ceph_decode_32(p);
dout("crush decode tunable choose_local_tries = %d",
c->choose_local_tries);
dout("crush decode tunable choose_local_fallback_tries = %d",
c->choose_local_fallback_tries);
dout("crush decode tunable choose_total_tries = %d",
c->choose_total_tries);

done:
dout("crush_decode success\n");
return c;

Expand Down

0 comments on commit 546f04e

Please sign in to comment.