Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block layer fixes from Jens Axboe:
 "A final set of fixes for 4.3.

  It is (again) bigger than I would have liked, but it's all been
  through the testing mill and has been carefully reviewed by multiple
  parties.  Each fix is either a regression fix for this cycle, or is
  marked stable.  You can scold me at KS.  The pull request contains:

   - Three simple fixes for NVMe, fixing regressions since 4.3.  From
     Arnd, Christoph, and Keith.

   - A single xen-blkfront fix from Cathy, fixing a NULL dereference if
     an error is returned through the staste change callback.

   - Fixup for some bad/sloppy code in nbd that got introduced earlier
     in this cycle.  From Markus Pargmann.

   - A blk-mq tagset use-after-free fix from Junichi.

   - A backing device lifetime fix from Tejun, fixing a crash.

   - And finally, a set of regression/stable fixes for cgroup writeback
     from Tejun"

* 'for-linus' of git://git.kernel.dk/linux-block:
  writeback: remove broken rbtree_postorder_for_each_entry_safe() usage in cgwb_bdi_destroy()
  NVMe: Fix memory leak on retried commands
  block: don't release bdi while request_queue has live references
  nvme: use an integer value to Linux errno values
  blk-mq: fix use-after-free in blk_mq_free_tag_set()
  nvme: fix 32-bit build warning
  writeback: fix incorrect calculation of available memory for memcg domains
  writeback: memcg dirty_throttle_control should be initialized with wb->memcg_completions
  writeback: bdi_writeback iteration must not skip dying ones
  writeback: fix bdi_writeback iteration in wakeup_dirtytime_writeback()
  writeback: laptop_mode_timer_fn() needs rcu_read_lock() around bdi_writeback iteration
  nbd: Add locking for tasks
  xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing)
  • Loading branch information
torvalds committed Oct 23, 2015
2 parents ef594c4 + e27c5b9 commit ea1ee5f
Show file tree
Hide file tree
Showing 14 changed files with 167 additions and 144 deletions.
2 changes: 1 addition & 1 deletion block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ void blk_cleanup_queue(struct request_queue *q)
q->queue_lock = &q->__queue_lock;
spin_unlock_irq(lock);

bdi_destroy(&q->backing_dev_info);
bdi_unregister(&q->backing_dev_info);

/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
Expand Down
1 change: 1 addition & 0 deletions block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
{
bt_free(&tags->bitmap_tags);
bt_free(&tags->breserved_tags);
free_cpumask_var(tags->cpumask);
kfree(tags);
}

Expand Down
4 changes: 1 addition & 3 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2296,10 +2296,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
int i;

for (i = 0; i < set->nr_hw_queues; i++) {
if (set->tags[i]) {
if (set->tags[i])
blk_mq_free_rq_map(set, set->tags[i], i);
free_cpumask_var(set->tags[i]->cpumask);
}
}

kfree(set->tags);
Expand Down
1 change: 1 addition & 0 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ static void blk_release_queue(struct kobject *kobj)
struct request_queue *q =
container_of(kobj, struct request_queue, kobj);

bdi_exit(&q->backing_dev_info);
blkcg_exit_queue(q);

if (q->elevator) {
Expand Down
36 changes: 30 additions & 6 deletions drivers/block/nbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct nbd_device {
bool disconnect; /* a disconnect has been requested by user */

struct timer_list timeout_timer;
spinlock_t tasks_lock;
struct task_struct *task_recv;
struct task_struct *task_send;

Expand Down Expand Up @@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd)
static void nbd_xmit_timeout(unsigned long arg)
{
struct nbd_device *nbd = (struct nbd_device *)arg;
struct task_struct *task;
unsigned long flags;

if (list_empty(&nbd->queue_head))
return;

nbd->disconnect = true;

task = READ_ONCE(nbd->task_recv);
if (task)
force_sig(SIGKILL, task);
spin_lock_irqsave(&nbd->tasks_lock, flags);

if (nbd->task_recv)
force_sig(SIGKILL, nbd->task_recv);

task = READ_ONCE(nbd->task_send);
if (task)
if (nbd->task_send)
force_sig(SIGKILL, nbd->task_send);

spin_unlock_irqrestore(&nbd->tasks_lock, flags);

dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
}

Expand Down Expand Up @@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd)
{
struct request *req;
int ret;
unsigned long flags;

BUG_ON(nbd->magic != NBD_MAGIC);

sk_set_memalloc(nbd->sock->sk);

spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = current;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);

ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");

spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = NULL;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);

return ret;
}

Expand All @@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd)

device_remove_file(disk_to_dev(nbd->disk), &pid_attr);

spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = NULL;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);

if (signal_pending(current)) {
siginfo_t info;
Expand Down Expand Up @@ -534,8 +546,11 @@ static int nbd_thread_send(void *data)
{
struct nbd_device *nbd = data;
struct request *req;
unsigned long flags;

spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_send = current;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);

set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
Expand Down Expand Up @@ -572,7 +587,15 @@ static int nbd_thread_send(void *data)
nbd_handle_req(nbd, req);
}

spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_send = NULL;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);

/* Clear maybe pending signals */
if (signal_pending(current)) {
siginfo_t info;
dequeue_signal_lock(current, &current->blocked, &info);
}

return 0;
}
Expand Down Expand Up @@ -1052,6 +1075,7 @@ static int __init nbd_init(void)
nbd_dev[i].magic = NBD_MAGIC;
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
spin_lock_init(&nbd_dev[i].tasks_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock);
init_timer(&nbd_dev[i].timeout_timer);
Expand Down
24 changes: 15 additions & 9 deletions drivers/block/nvme-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,27 +603,31 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
struct nvme_iod *iod = ctx;
struct request *req = iod_get_private(iod);
struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);

u16 status = le16_to_cpup(&cqe->status) >> 1;
bool requeue = false;
int error = 0;

if (unlikely(status)) {
if (!(status & NVME_SC_DNR || blk_noretry_request(req))
&& (jiffies - req->start_time) < req->timeout) {
unsigned long flags;

requeue = true;
blk_mq_requeue_request(req);
spin_lock_irqsave(req->q->queue_lock, flags);
if (!blk_queue_stopped(req->q))
blk_mq_kick_requeue_list(req->q);
spin_unlock_irqrestore(req->q->queue_lock, flags);
return;
goto release_iod;
}

if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
status = -EINTR;
error = -EINTR;
else
error = status;
} else {
status = nvme_error_status(status);
error = nvme_error_status(status);
}
}

Expand All @@ -635,8 +639,9 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
if (cmd_rq->aborted)
dev_warn(nvmeq->dev->dev,
"completing aborted command with status:%04x\n",
status);
error);

release_iod:
if (iod->nents) {
dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
Expand All @@ -649,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
}
nvme_free_iod(nvmeq->dev, iod);

blk_mq_complete_request(req, status);
if (likely(!requeue))
blk_mq_complete_request(req, error);
}

/* length is in bytes. gfp flags indicates whether we may sleep. */
Expand Down Expand Up @@ -1804,7 +1810,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)

length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;
metadata = (void __user *)(unsigned long)io.metadata;
metadata = (void __user *)(uintptr_t)io.metadata;
write = io.opcode & 1;

if (ns->ext) {
Expand Down Expand Up @@ -1844,7 +1850,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.metadata = cpu_to_le64(meta_dma);

status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
(void __user *)io.addr, length, NULL, 0);
(void __user *)(uintptr_t)io.addr, length, NULL, 0);
unmap:
if (meta) {
if (status == NVME_SC_SUCCESS && !write) {
Expand Down Expand Up @@ -1886,7 +1892,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
timeout = msecs_to_jiffies(cmd.timeout_ms);

status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
NULL, (void __user *)cmd.addr, cmd.data_len,
NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
&cmd.result, timeout);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
Expand Down
3 changes: 2 additions & 1 deletion drivers/block/xen-blkfront.c
Original file line number Diff line number Diff line change
Expand Up @@ -1956,7 +1956,8 @@ static void blkback_changed(struct xenbus_device *dev,
break;
/* Missed the backend's Closing state -- fallthrough */
case XenbusStateClosing:
blkfront_closing(info);
if (info)
blkfront_closing(info);
break;
}
}
Expand Down
35 changes: 24 additions & 11 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -778,19 +778,24 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
struct wb_writeback_work *base_work,
bool skip_if_busy)
{
int next_memcg_id = 0;
struct bdi_writeback *wb;
struct wb_iter iter;
struct bdi_writeback *last_wb = NULL;
struct bdi_writeback *wb = list_entry_rcu(&bdi->wb_list,
struct bdi_writeback, bdi_node);

might_sleep();
restart:
rcu_read_lock();
bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) {
list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
struct wb_writeback_work fallback_work;
struct wb_writeback_work *work;
long nr_pages;

if (last_wb) {
wb_put(last_wb);
last_wb = NULL;
}

/* SYNC_ALL writes out I_DIRTY_TIME too */
if (!wb_has_dirty_io(wb) &&
(base_work->sync_mode == WB_SYNC_NONE ||
Expand Down Expand Up @@ -819,12 +824,22 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,

wb_queue_work(wb, work);

next_memcg_id = wb->memcg_css->id + 1;
/*
* Pin @wb so that it stays on @bdi->wb_list. This allows
* continuing iteration from @wb after dropping and
* regrabbing rcu read lock.
*/
wb_get(wb);
last_wb = wb;

rcu_read_unlock();
wb_wait_for_completion(bdi, &fallback_work_done);
goto restart;
}
rcu_read_unlock();

if (last_wb)
wb_put(last_wb);
}

#else /* CONFIG_CGROUP_WRITEBACK */
Expand Down Expand Up @@ -1857,12 +1872,11 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
struct bdi_writeback *wb;
struct wb_iter iter;

if (!bdi_has_dirty_io(bdi))
continue;

bdi_for_each_wb(wb, bdi, &iter, 0)
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
false, reason);
}
Expand Down Expand Up @@ -1894,11 +1908,10 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
struct bdi_writeback *wb;
struct wb_iter iter;

bdi_for_each_wb(wb, bdi, &iter, 0)
if (!list_empty(&bdi->wb.b_dirty_time))
wb_wakeup(&bdi->wb);
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
if (!list_empty(&wb->b_dirty_time))
wb_wakeup(wb);
}
rcu_read_unlock();
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
Expand Down
3 changes: 3 additions & 0 deletions include/linux/backing-dev-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ struct bdi_writeback {
struct list_head work_list;
struct delayed_work dwork; /* work item used for writeback */

struct list_head bdi_node; /* anchored at bdi->wb_list */

#ifdef CONFIG_CGROUP_WRITEBACK
struct percpu_ref refcnt; /* used only for !root wb's */
struct fprop_local_percpu memcg_completions;
Expand Down Expand Up @@ -150,6 +152,7 @@ struct backing_dev_info {
atomic_long_t tot_write_bandwidth;

struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
#ifdef CONFIG_CGROUP_WRITEBACK
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
Expand Down
Loading

0 comments on commit ea1ee5f

Please sign in to comment.