Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 "This contains a set of fixes for xen-blkback by way of Konrad, and a
  performance regression fix for blk-mq for shared tags.

  The latter could account for as much as a 50x reduction in
  performance, with the test case from the user with 500 name spaces. A
  more realistic setup on my end with 32 drives showed a 3.5x drop. The
  fix has been thoroughly tested before being committed"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: fix performance regression with shared tags
  xen-blkback: don't leak stack data via response ring
  xen/blkback: don't use xen_blkif_get() in xen-blkback kthread
  xen/blkback: don't free be structure too early
  xen/blkback: fix disconnect while I/Os in flight
  • Loading branch information
torvalds committed Jun 22, 2017
2 parents 48b6bbe + 8e8320c commit 8d829b9
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 65 deletions.
58 changes: 46 additions & 12 deletions block/blk-mq-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
}

/*
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;

if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;

if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
} else
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}

static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return false;

if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;

if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
} else
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);

if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}

return false;
}

struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio,
unsigned int op,
Expand Down Expand Up @@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
return true;
}

static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
}
return false;
}

/**
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
* @pos: loop cursor.
Expand Down Expand Up @@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
unsigned int i, j;

if (set->flags & BLK_MQ_F_TAG_SHARED) {
/*
* If this is 0, then we know that no hardware queues
* have RESTART marked. We're done.
*/
if (!atomic_read(&queue->shared_hctx_restart))
return;

rcu_read_lock();
list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
tag_set_list) {
Expand Down
9 changes: 0 additions & 9 deletions block/blk-mq-sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
return false;
}

/*
* Mark a hardware queue as needing a restart.
*/
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}

static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
Expand Down
16 changes: 13 additions & 3 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
}

/*
* Caller needs to ensure that we're either frozen/quiesced, or that
* the queue isn't live yet.
*/
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
int i;

queue_for_each_hw_ctx(q, hctx, i) {
if (shared)
if (shared) {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
hctx->flags |= BLK_MQ_F_TAG_SHARED;
else
} else {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
}
}
}

static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
bool shared)
{
struct request_queue *q;

Expand Down
26 changes: 12 additions & 14 deletions drivers/block/xen-blkback/blkback.c
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
unsigned long timeout;
int ret;

xen_blkif_get(blkif);

set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
Expand Down Expand Up @@ -665,7 +663,6 @@ int xen_blkif_schedule(void *arg)
print_stats(ring);

ring->xenblkd = NULL;
xen_blkif_put(blkif);

return 0;
}
Expand Down Expand Up @@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st)
{
struct blkif_response resp;
struct blkif_response *resp;
unsigned long flags;
union blkif_back_rings *blk_rings;
int notify;

resp.id = id;
resp.operation = op;
resp.status = st;

spin_lock_irqsave(&ring->blk_ring_lock, flags);
blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */
switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
&resp, sizeof(resp));
resp = RING_GET_RESPONSE(&blk_rings->native,
blk_rings->native.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_32:
memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
&resp, sizeof(resp));
resp = RING_GET_RESPONSE(&blk_rings->x86_32,
blk_rings->x86_32.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_64:
memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
&resp, sizeof(resp));
resp = RING_GET_RESPONSE(&blk_rings->x86_64,
blk_rings->x86_64.rsp_prod_pvt);
break;
default:
BUG();
}

resp->id = id;
resp->operation = op;
resp->status = st;

blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
Expand Down
26 changes: 6 additions & 20 deletions drivers/block/xen-blkback/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
struct blkif_common_request {
char dummy;
};
struct blkif_common_response {
char dummy;
};

/* i386 protocol version */

struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
Expand Down Expand Up @@ -129,14 +128,6 @@ struct blkif_x86_32_request {
} u;
} __attribute__((__packed__));

/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
#pragma pack(pop)
/* x86_64 protocol version */

struct blkif_x86_64_request_rw {
Expand Down Expand Up @@ -193,18 +184,12 @@ struct blkif_x86_64_request {
} u;
} __attribute__((__packed__));

struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};

DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
struct blkif_common_response);
struct blkif_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
struct blkif_x86_32_response);
struct blkif_response __packed);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
struct blkif_x86_64_response);
struct blkif_response);

union blkif_back_rings {
struct blkif_back_ring native;
Expand Down Expand Up @@ -281,6 +266,7 @@ struct xen_blkif_ring {

wait_queue_head_t wq;
atomic_t inflight;
bool active;
/* One thread per blkif ring. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
Expand Down
15 changes: 8 additions & 7 deletions drivers/block/xen-blkback/xenbus.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif;
ring->st_print = jiffies;
xen_blkif_get(blkif);
ring->active = true;
}

return 0;
Expand Down Expand Up @@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
struct xen_blkif_ring *ring = &blkif->rings[r];
unsigned int i = 0;

if (!ring->active)
continue;

if (ring->xenblkd) {
kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq);
ring->xenblkd = NULL;
}

/* The above kthread_stop() guarantees that at this point we
Expand Down Expand Up @@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
BUG_ON(ring->free_pages_num != 0);
BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
xen_blkif_put(blkif);
ring->active = false;
}
blkif->nr_ring_pages = 0;
/*
Expand All @@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)

static void xen_blkif_free(struct xen_blkif *blkif)
{

xen_blkif_disconnect(blkif);
WARN_ON(xen_blkif_disconnect(blkif));
xen_vbd_free(&blkif->vbd);
kfree(blkif->be->mode);
kfree(blkif->be);

/* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif);
Expand Down Expand Up @@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
xen_blkif_put(be->blkif);
}

kfree(be->mode);
kfree(be);
return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */

atomic_t shared_hctx_restart;

struct blk_queue_stats *stats;
struct rq_wb *rq_wb;

Expand Down

0 comments on commit 8d829b9

Please sign in to comment.