Skip to content

Commit

Permalink
Merge branch 'block-5.7' into for-5.8/block
Browse files Browse the repository at this point in the history
Pull in block-5.7 fixes for 5.8. Mostly to resolve a conflict with
the blk-iocost changes, but we also need the base of the bdi
use-after-free as well as we build on top of it.

* block-5.7:
  nvme: fix possible hang when ns scanning fails during error recovery
  nvme-pci: fix "slimmer CQ head update"
  bdi: add a ->dev_name field to struct backing_dev_info
  bdi: use bdi_dev_name() to get device name
  bdi: move bdi_dev_name out of line
  vboxsf: don't use the source name in the bdi name
  iocost: protect iocg->abs_vdebt with iocg->waitq.lock
  block: remove the bd_openers checks in blk_drop_partitions
  nvme: prevent double free in nvme_alloc_ns() error handling
  null_blk: Cleanup zoned device initialization
  null_blk: Fix zoned command handling
  block: remove unused header
  blk-iocost: Fix error on iocost_ioc_vrate_adj
  bdev: Reduce time holding bd_mutex in sync in blkdev_close()
  buffer: remove useless comment and WB_REASON_FREE_MORE_MEM, reason.

Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
axboe committed May 9, 2020
2 parents 8b075e5 + 59c7c3c commit 873f1c8
Show file tree
Hide file tree
Showing 20 changed files with 209 additions and 131 deletions.
6 changes: 4 additions & 2 deletions block/bfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
#include <linux/ioprio.h>
#include <linux/sbitmap.h>
#include <linux/delay.h>
#include <linux/backing-dev.h>

#include "blk.h"
#include "blk-mq.h"
Expand Down Expand Up @@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
switch (ioprio_class) {
default:
dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
"bfq: bad prio class %d\n", ioprio_class);
pr_err("bdi %s: bfq: bad prio class %d\n",
bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
ioprio_class);
/* fall through */
case IOPRIO_CLASS_NONE:
/*
Expand Down
2 changes: 1 addition & 1 deletion block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
{
/* some drivers (floppy) instantiate a queue w/o disk registered */
if (blkg->q->backing_dev_info->dev)
return dev_name(blkg->q->backing_dev_info->dev);
return bdi_dev_name(blkg->q->backing_dev_info);
return NULL;
}

Expand Down
121 changes: 73 additions & 48 deletions block/blk-iocost.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ struct ioc_gq {
*/
atomic64_t vtime;
atomic64_t done_vtime;
atomic64_t abs_vdebt;
u64 abs_vdebt;
u64 last_vtime;

/*
Expand Down Expand Up @@ -1143,7 +1143,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
struct iocg_wake_ctx ctx = { .iocg = iocg };
u64 margin_ns = (u64)(ioc->period_us *
WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
u64 vdebt, vshortage, expires, oexpires;
s64 vbudget;
u32 hw_inuse;

Expand All @@ -1153,18 +1153,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
vbudget = now->vnow - atomic64_read(&iocg->vtime);

/* pay off debt */
abs_vdebt = atomic64_read(&iocg->abs_vdebt);
vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
if (vdebt && vbudget > 0) {
u64 delta = min_t(u64, vbudget, vdebt);
u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
abs_vdebt);
iocg->abs_vdebt);

atomic64_add(delta, &iocg->vtime);
atomic64_add(delta, &iocg->done_vtime);
atomic64_sub(abs_delta, &iocg->abs_vdebt);
if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
atomic64_set(&iocg->abs_vdebt, 0);
iocg->abs_vdebt -= abs_delta;
}

/*
Expand Down Expand Up @@ -1220,12 +1217,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
u64 delta_ns, expires, oexpires;
u32 hw_inuse;

lockdep_assert_held(&iocg->waitq.lock);

/* debt-adjust vtime */
current_hweight(iocg, NULL, &hw_inuse);
vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);

/* clear or maintain depending on the overage */
if (time_before_eq64(vtime, now->vnow)) {
/*
* Clear or maintain depending on the overage. Non-zero vdebt is what
* guarantees that @iocg is online and future iocg_kick_delay() will
* clear use_delay. Don't leave it on when there's no vdebt.
*/
if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
blkcg_clear_delay(blkg);
return false;
}
Expand Down Expand Up @@ -1254,9 +1257,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
{
struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
struct ioc_now now;
unsigned long flags;

spin_lock_irqsave(&iocg->waitq.lock, flags);
ioc_now(iocg->ioc, &now);
iocg_kick_delay(iocg, &now);
spin_unlock_irqrestore(&iocg->waitq.lock, flags);

return HRTIMER_NORESTART;
}
Expand Down Expand Up @@ -1364,14 +1370,13 @@ static void ioc_timer_fn(struct timer_list *timer)
* should have woken up in the last period and expire idle iocgs.
*/
list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
if (!waitqueue_active(&iocg->waitq) &&
!atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
!iocg_is_idle(iocg))
continue;

spin_lock(&iocg->waitq.lock);

if (waitqueue_active(&iocg->waitq) ||
atomic64_read(&iocg->abs_vdebt)) {
if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
/* might be oversleeping vtime / hweight changes, kick */
iocg_kick_waitq(iocg, &now);
iocg_kick_delay(iocg, &now);
Expand Down Expand Up @@ -1587,7 +1592,7 @@ static void ioc_timer_fn(struct timer_list *timer)
vrate_min, vrate_max);
}

trace_iocost_ioc_vrate_adj(ioc, vrate, &missed_ppm, rq_wait_pct,
trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct,
nr_lagging, nr_shortages,
nr_surpluses);

Expand All @@ -1596,7 +1601,7 @@ static void ioc_timer_fn(struct timer_list *timer)
ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
} else if (ioc->busy_level != prev_busy_level || nr_lagging) {
trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
&missed_ppm, rq_wait_pct, nr_lagging,
missed_ppm, rq_wait_pct, nr_lagging,
nr_shortages, nr_surpluses);
}

Expand Down Expand Up @@ -1739,28 +1744,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
* tests are racy but the races aren't systemic - we only miss once
* in a while which is fine.
*/
if (!waitqueue_active(&iocg->waitq) &&
!atomic64_read(&iocg->abs_vdebt) &&
if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
time_before_eq64(vtime + cost, now.vnow)) {
iocg_commit_bio(iocg, bio, cost);
return;
}

/*
* We're over budget. If @bio has to be issued regardless,
* remember the abs_cost instead of advancing vtime.
* iocg_kick_waitq() will pay off the debt before waking more IOs.
* We activated above but w/o any synchronization. Deactivation is
* synchronized with waitq.lock and we won't get deactivated as long
* as we're waiting or has debt, so we're good if we're activated
* here. In the unlikely case that we aren't, just issue the IO.
*/
spin_lock_irq(&iocg->waitq.lock);

if (unlikely(list_empty(&iocg->active_list))) {
spin_unlock_irq(&iocg->waitq.lock);
iocg_commit_bio(iocg, bio, cost);
return;
}

/*
* We're over budget. If @bio has to be issued regardless, remember
* the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
* off the debt before waking more IOs.
*
* This way, the debt is continuously paid off each period with the
* actual budget available to the cgroup. If we just wound vtime,
* we would incorrectly use the current hw_inuse for the entire
* amount which, for example, can lead to the cgroup staying
* blocked for a long time even with substantially raised hw_inuse.
* actual budget available to the cgroup. If we just wound vtime, we
* would incorrectly use the current hw_inuse for the entire amount
* which, for example, can lead to the cgroup staying blocked for a
* long time even with substantially raised hw_inuse.
*
* An iocg with vdebt should stay online so that the timer can keep
* deducting its vdebt and [de]activate use_delay mechanism
* accordingly. We don't want to race against the timer trying to
* clear them and leave @iocg inactive w/ dangling use_delay heavily
* penalizing the cgroup and its descendants.
*/
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
atomic64_add(abs_cost, &iocg->abs_vdebt);
iocg->abs_vdebt += abs_cost;
if (iocg_kick_delay(iocg, &now))
blkcg_schedule_throttle(rqos->q,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
spin_unlock_irq(&iocg->waitq.lock);
return;
}

Expand All @@ -1777,20 +1803,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
* All waiters are on iocg->waitq and the wait states are
* synchronized using waitq.lock.
*/
spin_lock_irq(&iocg->waitq.lock);

/*
* We activated above but w/o any synchronization. Deactivation is
* synchronized with waitq.lock and we won't get deactivated as
* long as we're waiting, so we're good if we're activated here.
* In the unlikely case that we are deactivated, just issue the IO.
*/
if (unlikely(list_empty(&iocg->active_list))) {
spin_unlock_irq(&iocg->waitq.lock);
iocg_commit_bio(iocg, bio, cost);
return;
}

init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
wait.wait.private = current;
wait.bio = bio;
Expand Down Expand Up @@ -1822,6 +1834,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
struct ioc_now now;
u32 hw_inuse;
u64 abs_cost, cost;
unsigned long flags;

/* bypass if disabled or for root cgroup */
if (!ioc->enabled || !iocg->level)
Expand All @@ -1841,15 +1854,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
iocg->cursor = bio_end;

/*
* Charge if there's enough vtime budget and the existing request
* has cost assigned. Otherwise, account it as debt. See debt
* handling in ioc_rqos_throttle() for details.
* Charge if there's enough vtime budget and the existing request has
* cost assigned.
*/
if (rq->bio && rq->bio->bi_iocost_cost &&
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
iocg_commit_bio(iocg, bio, cost);
else
atomic64_add(abs_cost, &iocg->abs_vdebt);
return;
}

/*
* Otherwise, account it as debt if @iocg is online, which it should
* be for the vast majority of cases. See debt handling in
* ioc_rqos_throttle() for details.
*/
spin_lock_irqsave(&iocg->waitq.lock, flags);
if (likely(!list_empty(&iocg->active_list))) {
iocg->abs_vdebt += abs_cost;
iocg_kick_delay(iocg, &now);
} else {
iocg_commit_bio(iocg, bio, cost);
}
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
}

static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
Expand Down Expand Up @@ -2021,7 +2047,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
iocg->ioc = ioc;
atomic64_set(&iocg->vtime, now.vnow);
atomic64_set(&iocg->done_vtime, now.vnow);
atomic64_set(&iocg->abs_vdebt, 0);
atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
INIT_LIST_HEAD(&iocg->active_list);
iocg->hweight_active = HWEIGHT_WHOLE;
Expand Down
2 changes: 1 addition & 1 deletion block/partitions/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ int blk_drop_partitions(struct block_device *bdev)

if (!disk_part_scan_enabled(bdev->bd_disk))
return 0;
if (bdev->bd_part_count || bdev->bd_openers > 1)
if (bdev->bd_part_count)
return -EBUSY;

sync_blockdev(bdev);
Expand Down
29 changes: 19 additions & 10 deletions drivers/block/null_blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,26 +85,35 @@ struct nullb {
char disk_name[DISK_NAME_LEN];
};

blk_status_t null_process_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
unsigned int nr_sectors);

#ifdef CONFIG_BLK_DEV_ZONED
int null_zone_init(struct nullb_device *dev);
void null_zone_exit(struct nullb_device *dev);
int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
int null_register_zoned_dev(struct nullb *nullb);
void null_free_zoned_dev(struct nullb_device *dev);
int null_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
sector_t nr_sectors);
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
sector_t nr_sectors);
size_t null_zone_valid_read_len(struct nullb *nullb,
sector_t sector, unsigned int len);
#else
static inline int null_zone_init(struct nullb_device *dev)
static inline int null_init_zoned_dev(struct nullb_device *dev,
struct request_queue *q)
{
pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
return -EINVAL;
}
static inline void null_zone_exit(struct nullb_device *dev) {}
static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
sector_t nr_sectors)
static inline int null_register_zoned_dev(struct nullb *nullb)
{
return -ENODEV;
}
static inline void null_free_zoned_dev(struct nullb_device *dev) {}
static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector, sector_t nr_sectors)
{
return BLK_STS_NOTSUPP;
}
Expand Down
Loading

0 comments on commit 873f1c8

Please sign in to comment.