Skip to content

Commit

Permalink
Merge tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph fixes from Ilya Dryomov:
 "A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with
  ->reencode_message() and last minute RADOS semantic changes in
  v12.1.2"

* tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client:
  libceph: make RECOVERY_DELETES feature create a new interval
  libceph: upmap semantic changes
  crush: assume weight_set != null imples weight_set_size > 0
  libceph: fallback for when there isn't a pool-specific choose_arg
  libceph: don't call ->reencode_message() more than once per message
  libceph: make encode_request_*() work with r_mempool requests
  • Loading branch information
torvalds committed Aug 4, 2017
2 parents a64c40e + ae78dd8 commit c63716a
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 38 deletions.
1 change: 1 addition & 0 deletions include/linux/ceph/osd_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ struct ceph_osd_request_target {
int size;
int min_size;
bool sort_bitwise;
bool recovery_deletes;

unsigned int flags; /* CEPH_OSD_FLAG_* */
bool paused;
Expand Down
2 changes: 2 additions & 0 deletions include/linux/ceph/osdmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
u32 new_pg_num,
bool old_sort_bitwise,
bool new_sort_bitwise,
bool old_recovery_deletes,
bool new_recovery_deletes,
const struct ceph_pg *pgid);
bool ceph_osds_changed(const struct ceph_osds *old_acting,
const struct ceph_osds *new_acting,
Expand Down
4 changes: 4 additions & 0 deletions include/linux/ceph/rados.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s);
#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */

/*
* The error code to return when an OSD can't handle a write
Expand Down
2 changes: 1 addition & 1 deletion include/linux/crush/crush.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ struct crush_choose_arg {
struct crush_choose_arg_map {
#ifdef __KERNEL__
struct rb_node node;
u64 choose_args_index;
s64 choose_args_index;
#endif
struct crush_choose_arg *args; /*!< replacement for each bucket
in the crushmap */
Expand Down
2 changes: 1 addition & 1 deletion net/ceph/crush/mapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
const struct crush_choose_arg *arg,
int position)
{
if (!arg || !arg->weight_set || arg->weight_set_size == 0)
if (!arg || !arg->weight_set)
return bucket->item_weights;

if (position >= arg->weight_set_size)
Expand Down
6 changes: 3 additions & 3 deletions net/ceph/messenger.c
Original file line number Diff line number Diff line change
Expand Up @@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con)
if (m->needs_out_seq) {
m->hdr.seq = cpu_to_le64(++con->out_seq);
m->needs_out_seq = false;
}

if (con->ops->reencode_message)
con->ops->reencode_message(m);
if (con->ops->reencode_message)
con->ops->reencode_message(m);
}

dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
Expand Down
14 changes: 11 additions & 3 deletions net/ceph/osd_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
bool legacy_change;
bool split = false;
bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
bool recovery_deletes = ceph_osdmap_flag(osdc,
CEPH_OSDMAP_RECOVERY_DELETES);
enum calc_target_result ct_res;
int ret;

Expand Down Expand Up @@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
pi->pg_num,
t->sort_bitwise,
sort_bitwise,
t->recovery_deletes,
recovery_deletes,
&last_pgid))
force_resend = true;

Expand All @@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
t->pg_num = pi->pg_num;
t->pg_num_mask = pi->pg_num_mask;
t->sort_bitwise = sort_bitwise;
t->recovery_deletes = recovery_deletes;

t->osd = acting.primary;
}
Expand Down Expand Up @@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
}

ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
BUG_ON(p != end - 8); /* space for features */
BUG_ON(p > end - 8); /* space for features */

msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
/* front_len is finalized in encode_request_finish() */
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
msg->hdr.data_len = cpu_to_le32(data_len);
/*
* The header "data_off" is a hint to the receiver allowing it
Expand All @@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
static void encode_request_finish(struct ceph_msg *msg)
{
void *p = msg->front.iov_base;
void *const partial_end = p + msg->front.iov_len;
void *const end = p + msg->front_alloc_len;

if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
/* luminous OSD -- encode features and be done */
p = end - 8;
p = partial_end;
ceph_encode_64(&p, msg->con->peer_features);
} else {
struct {
Expand Down Expand Up @@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg)
oid_len = p - oid;

tail = p;
tail_len = (end - p) - 8;
tail_len = partial_end - p;

p = msg->front.iov_base;
ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
Expand Down
60 changes: 30 additions & 30 deletions net/ceph/osdmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
ret = decode_choose_arg(p, end, arg);
if (ret)
goto fail;

if (arg->ids_size &&
arg->ids_size != c->buckets[bucket_index]->size)
goto e_inval;
}

insert_choose_arg_map(&c->choose_args, arg_map);
Expand Down Expand Up @@ -2078,14 +2082,17 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
u32 new_pg_num,
bool old_sort_bitwise,
bool new_sort_bitwise,
bool old_recovery_deletes,
bool new_recovery_deletes,
const struct ceph_pg *pgid)
{
return !osds_equal(old_acting, new_acting) ||
!osds_equal(old_up, new_up) ||
old_size != new_size ||
old_min_size != new_min_size ||
ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
old_sort_bitwise != new_sort_bitwise;
old_sort_bitwise != new_sort_bitwise ||
old_recovery_deletes != new_recovery_deletes;
}

static int calc_pg_rank(int osd, const struct ceph_osds *acting)
Expand Down Expand Up @@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
}
}

/*
* Magic value used for a "default" fallback choose_args, used if the
* crush_choose_arg_map passed to do_crush() does not exist. If this
* also doesn't exist, fall back to canonical weights.
*/
#define CEPH_DEFAULT_CHOOSE_ARGS -1

static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
int *result, int result_max,
const __u32 *weight, int weight_max,
u64 choose_args_index)
s64 choose_args_index)
{
struct crush_choose_arg_map *arg_map;
int r;
Expand All @@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,

arg_map = lookup_choose_arg_map(&map->crush->choose_args,
choose_args_index);
if (!arg_map)
arg_map = lookup_choose_arg_map(&map->crush->choose_args,
CEPH_DEFAULT_CHOOSE_ARGS);

mutex_lock(&map->crush_workspace_mutex);
r = crush_do_rule(map->crush, ruleno, x, result, result_max,
Expand Down Expand Up @@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
for (i = 0; i < pg->pg_upmap.len; i++)
raw->osds[i] = pg->pg_upmap.osds[i];
raw->size = pg->pg_upmap.len;
return;
/* check and apply pg_upmap_items, if any */
}

pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
if (pg) {
/*
* Note: this approach does not allow a bidirectional swap,
* e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
*/
for (i = 0; i < pg->pg_upmap_items.len; i++) {
int from = pg->pg_upmap_items.from_to[i][0];
int to = pg->pg_upmap_items.from_to[i][1];
int pos = -1;
bool exists = false;

/* make sure replacement doesn't already appear */
for (j = 0; j < raw->size; j++) {
int osd = raw->osds[j];

if (osd == to) {
exists = true;
for (i = 0; i < raw->size; i++) {
for (j = 0; j < pg->pg_upmap_items.len; j++) {
int from = pg->pg_upmap_items.from_to[j][0];
int to = pg->pg_upmap_items.from_to[j][1];

if (from == raw->osds[i]) {
if (!(to != CRUSH_ITEM_NONE &&
to < osdmap->max_osd &&
osdmap->osd_weight[to] == 0))
raw->osds[i] = to;
break;
}
/* ignore mapping if target is marked out */
if (osd == from && pos < 0 &&
!(to != CRUSH_ITEM_NONE &&
to < osdmap->max_osd &&
osdmap->osd_weight[to] == 0)) {
pos = j;
}
}
if (!exists && pos >= 0) {
raw->osds[pos] = to;
return;
}
}
}
Expand Down

0 comments on commit c63716a

Please sign in to comment.