Skip to content

Commit

Permalink
libceph: don't time out osd requests that haven't been received
Browse files Browse the repository at this point in the history
Keep track of when an outgoing message is ACKed (i.e., the server fully
received it and, presumably, queued it for processing).  Time out OSD
requests only if it's been too long since they've been received.

This prevents timeouts and connection thrashing when the OSDs are simply
busy and are throttling the requests they read off the network.

Reviewed-by: Yehuda Sadeh <[email protected]>
Signed-off-by: Sage Weil <[email protected]>
  • Loading branch information
liewegas committed Jul 26, 2011
1 parent 8f04d42 commit 4cf9d54
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
1 change: 1 addition & 0 deletions include/linux/ceph/messenger.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ struct ceph_msg {
bool more_to_follow;
bool needs_out_seq;
int front_max;
unsigned long ack_stamp; /* tx: when we were acked */

struct ceph_msgpool *pool;
};
Expand Down
12 changes: 5 additions & 7 deletions net/ceph/messenger.c
Original file line number Diff line number Diff line change
Expand Up @@ -486,13 +486,10 @@ static void prepare_write_message(struct ceph_connection *con)
m = list_first_entry(&con->out_queue,
struct ceph_msg, list_head);
con->out_msg = m;
if (test_bit(LOSSYTX, &con->state)) {
list_del_init(&m->list_head);
} else {
/* put message on sent list */
ceph_msg_get(m);
list_move_tail(&m->list_head, &con->out_sent);
}

/* put message on sent list */
ceph_msg_get(m);
list_move_tail(&m->list_head, &con->out_sent);

/*
* only assign outgoing seq # if we haven't sent this message
Expand Down Expand Up @@ -1399,6 +1396,7 @@ static void process_ack(struct ceph_connection *con)
break;
dout("got ack for seq %llu type %d at %p\n", seq,
le16_to_cpu(m->hdr.type), m);
m->ack_stamp = jiffies;
ceph_msg_remove(m);
}
prepare_read_tag(con);
Expand Down
6 changes: 6 additions & 0 deletions net/ceph/osd_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -1085,9 +1085,15 @@ static void handle_timeout(struct work_struct *work)
req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
r_req_lru_item);

/* hasn't been long enough since we sent it? */
if (time_before(jiffies, req->r_stamp + timeout))
break;

/* hasn't been long enough since it was acked? */
if (req->r_request->ack_stamp == 0 ||
time_before(jiffies, req->r_request->ack_stamp + timeout))
break;

BUG_ON(req == last_req && req->r_stamp == last_stamp);
last_req = req;
last_stamp = req->r_stamp;
Expand Down

0 comments on commit 4cf9d54

Please sign in to comment.