Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 "A collection of fixes that should go into this series. This contains:

   - A set of NVMe fixes, pulled from Christoph. This includes a set of
     fixes for the fiber channel bits from James Smart, rdma queue depth
     fix from Marta, controller removal fixes from Ming, and some more
     APST quirk updates from Andy.

   - A blk-mq debugfs fix from Bart, fixing a problem with the
     untangling of the sysfs and debugfs blk-mq bits that was added in
     this series.

   - Error code fix in add_partition() from Dan.

   - A small series of fixes for the new blk-throttle code from Shaohua"

* 'for-linus' of git://git.kernel.dk/linux-block: (21 commits)
  blk-mq: Only register debugfs attributes for blk-mq queues
  nvme: Quirk APST on Intel 600P/P3100 devices
  nvme: only setup block integrity if supported by the driver
  nvme: replace is_flags field in nvme_ctrl_ops with a flags field
  nvme-pci: consistencly use ctrl->device for logging
  partitions/msdos: FreeBSD UFS2 file systems are not recognized
  block: fix an error code in add_partition()
  blk-throttle: force user to configure all settings for io.low
  blk-throttle: respect 0 bps/iops settings for io.low
  blk-throttle: output some debug info in trace
  blk-throttle: add hierarchy support for latency target and idle time
  nvme_fc: remove extra controller reference taken on reconnect
  nvme_fc: correct nvme status set on abort
  nvme_fc: set logging level on resets/deletes
  nvme_fc: revise comment on teardown
  nvme_fc: Support ctrl_loss_tmo
  nvme_fc: get rid of local reconnect_delay
  blk-mq: remove blk_mq_abort_requeue_list()
  nvme: avoid to use blk_mq_abort_requeue_list()
  nvme: use blk_mq_start_hw_queues() in nvme_kill_queues()
  ...
  • Loading branch information
torvalds committed May 26, 2017
2 parents 6ce4782 + 8aa6382 commit 1b8f2ff
Show file tree
Hide file tree
Showing 12 changed files with 247 additions and 210 deletions.
19 changes: 0 additions & 19 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);

void blk_mq_abort_requeue_list(struct request_queue *q)
{
unsigned long flags;
LIST_HEAD(rq_list);

spin_lock_irqsave(&q->requeue_lock, flags);
list_splice_init(&q->requeue_list, &rq_list);
spin_unlock_irqrestore(&q->requeue_lock, flags);

while (!list_empty(&rq_list)) {
struct request *rq;

rq = list_first_entry(&rq_list, struct request, queuelist);
list_del_init(&rq->queuelist);
blk_mq_end_request(rq, -EIO);
}
}
EXPORT_SYMBOL(blk_mq_abort_requeue_list);

struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
if (tag < tags->nr_tags) {
Expand Down
6 changes: 3 additions & 3 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -887,10 +887,10 @@ int blk_register_queue(struct gendisk *disk)
goto unlock;
}

if (q->mq_ops)
if (q->mq_ops) {
__blk_mq_register_dev(dev, q);

blk_mq_debugfs_register(q);
blk_mq_debugfs_register(q);
}

kobject_uevent(&q->kobj, KOBJ_ADD);

Expand Down
172 changes: 109 additions & 63 deletions block/blk-throttle.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ static int throtl_quantum = 32;
#define DFL_THROTL_SLICE_HD (HZ / 10)
#define DFL_THROTL_SLICE_SSD (HZ / 50)
#define MAX_THROTL_SLICE (HZ)
#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
/* default latency target is 0, eg, guarantee IO latency by default */
#define DFL_LATENCY_TARGET (0)
#define MIN_THROTL_BPS (320 * 1024)
#define MIN_THROTL_IOPS (10)
#define DFL_LATENCY_TARGET (-1L)
#define DFL_IDLE_THRESHOLD (0)

#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)

Expand Down Expand Up @@ -157,6 +157,7 @@ struct throtl_grp {
unsigned long last_check_time;

unsigned long latency_target; /* us */
unsigned long latency_target_conf; /* us */
/* When did we start a new slice */
unsigned long slice_start[2];
unsigned long slice_end[2];
Expand All @@ -165,6 +166,7 @@ struct throtl_grp {
unsigned long checked_last_finish_time; /* ns / 1024 */
unsigned long avg_idletime; /* ns / 1024 */
unsigned long idletime_threshold; /* us */
unsigned long idletime_threshold_conf; /* us */

unsigned int bio_cnt; /* total bios */
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
Expand Down Expand Up @@ -201,8 +203,6 @@ struct throtl_data
unsigned int limit_index;
bool limit_valid[LIMIT_CNT];

unsigned long dft_idletime_threshold; /* us */

unsigned long low_upgrade_time;
unsigned long low_downgrade_time;

Expand Down Expand Up @@ -294,8 +294,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)

td = tg->td;
ret = tg->bps[rw][td->limit_index];
if (ret == 0 && td->limit_index == LIMIT_LOW)
return tg->bps[rw][LIMIT_MAX];
if (ret == 0 && td->limit_index == LIMIT_LOW) {
/* intermediate node or iops isn't 0 */
if (!list_empty(&blkg->blkcg->css.children) ||
tg->iops[rw][td->limit_index])
return U64_MAX;
else
return MIN_THROTL_BPS;
}

if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
Expand All @@ -315,10 +321,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)

if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
return UINT_MAX;

td = tg->td;
ret = tg->iops[rw][td->limit_index];
if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
return tg->iops[rw][LIMIT_MAX];
if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
/* intermediate node or bps isn't 0 */
if (!list_empty(&blkg->blkcg->css.children) ||
tg->bps[rw][td->limit_index])
return UINT_MAX;
else
return MIN_THROTL_IOPS;
}

if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
Expand Down Expand Up @@ -482,6 +495,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
/* LIMIT_LOW will have default value 0 */

tg->latency_target = DFL_LATENCY_TARGET;
tg->latency_target_conf = DFL_LATENCY_TARGET;
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;

return &tg->pd;
}
Expand Down Expand Up @@ -510,8 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
tg->td = td;

tg->idletime_threshold = td->dft_idletime_threshold;
}

/*
Expand Down Expand Up @@ -1349,7 +1363,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
return 0;
}

static void tg_conf_updated(struct throtl_grp *tg)
static void tg_conf_updated(struct throtl_grp *tg, bool global)
{
struct throtl_service_queue *sq = &tg->service_queue;
struct cgroup_subsys_state *pos_css;
Expand All @@ -1367,8 +1381,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
* restrictions in the whole hierarchy and allows them to bypass
* blk-throttle.
*/
blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
tg_update_has_rules(blkg_to_tg(blkg));
blkg_for_each_descendant_pre(blkg, pos_css,
global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
struct throtl_grp *this_tg = blkg_to_tg(blkg);
struct throtl_grp *parent_tg;

tg_update_has_rules(this_tg);
/* ignore root/second level */
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
!blkg->parent->parent)
continue;
parent_tg = blkg_to_tg(blkg->parent);
/*
* make sure all children has lower idle time threshold and
* higher latency target
*/
this_tg->idletime_threshold = min(this_tg->idletime_threshold,
parent_tg->idletime_threshold);
this_tg->latency_target = max(this_tg->latency_target,
parent_tg->latency_target);
}

/*
* We're already holding queue_lock and know @tg is valid. Let's
Expand Down Expand Up @@ -1413,7 +1445,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
else
*(unsigned int *)((void *)tg + of_cft(of)->private) = v;

tg_conf_updated(tg);
tg_conf_updated(tg, false);
ret = 0;
out_finish:
blkg_conf_finish(&ctx);
Expand Down Expand Up @@ -1497,34 +1529,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
tg->iops_conf[READ][off] == iops_dft &&
tg->iops_conf[WRITE][off] == iops_dft &&
(off != LIMIT_LOW ||
(tg->idletime_threshold == tg->td->dft_idletime_threshold &&
tg->latency_target == DFL_LATENCY_TARGET)))
(tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
tg->latency_target_conf == DFL_LATENCY_TARGET)))
return 0;

if (tg->bps_conf[READ][off] != bps_dft)
if (tg->bps_conf[READ][off] != U64_MAX)
snprintf(bufs[0], sizeof(bufs[0]), "%llu",
tg->bps_conf[READ][off]);
if (tg->bps_conf[WRITE][off] != bps_dft)
if (tg->bps_conf[WRITE][off] != U64_MAX)
snprintf(bufs[1], sizeof(bufs[1]), "%llu",
tg->bps_conf[WRITE][off]);
if (tg->iops_conf[READ][off] != iops_dft)
if (tg->iops_conf[READ][off] != UINT_MAX)
snprintf(bufs[2], sizeof(bufs[2]), "%u",
tg->iops_conf[READ][off]);
if (tg->iops_conf[WRITE][off] != iops_dft)
if (tg->iops_conf[WRITE][off] != UINT_MAX)
snprintf(bufs[3], sizeof(bufs[3]), "%u",
tg->iops_conf[WRITE][off]);
if (off == LIMIT_LOW) {
if (tg->idletime_threshold == ULONG_MAX)
if (tg->idletime_threshold_conf == ULONG_MAX)
strcpy(idle_time, " idle=max");
else
snprintf(idle_time, sizeof(idle_time), " idle=%lu",
tg->idletime_threshold);
tg->idletime_threshold_conf);

if (tg->latency_target == ULONG_MAX)
if (tg->latency_target_conf == ULONG_MAX)
strcpy(latency_time, " latency=max");
else
snprintf(latency_time, sizeof(latency_time),
" latency=%lu", tg->latency_target);
" latency=%lu", tg->latency_target_conf);
}

seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
Expand Down Expand Up @@ -1563,8 +1595,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
v[2] = tg->iops_conf[READ][index];
v[3] = tg->iops_conf[WRITE][index];

idle_time = tg->idletime_threshold;
latency_time = tg->latency_target;
idle_time = tg->idletime_threshold_conf;
latency_time = tg->latency_target_conf;
while (true) {
char tok[27]; /* wiops=18446744073709551616 */
char *p;
Expand Down Expand Up @@ -1623,17 +1655,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
tg->iops_conf[READ][LIMIT_MAX]);
tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
tg->iops_conf[WRITE][LIMIT_MAX]);
tg->idletime_threshold_conf = idle_time;
tg->latency_target_conf = latency_time;

/* force user to configure all settings for low limit */
if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
tg->latency_target_conf == DFL_LATENCY_TARGET) {
tg->bps[READ][LIMIT_LOW] = 0;
tg->bps[WRITE][LIMIT_LOW] = 0;
tg->iops[READ][LIMIT_LOW] = 0;
tg->iops[WRITE][LIMIT_LOW] = 0;
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
tg->latency_target = DFL_LATENCY_TARGET;
} else if (index == LIMIT_LOW) {
tg->idletime_threshold = tg->idletime_threshold_conf;
tg->latency_target = tg->latency_target_conf;
}

if (index == LIMIT_LOW) {
blk_throtl_update_limit_valid(tg->td);
if (tg->td->limit_valid[LIMIT_LOW])
blk_throtl_update_limit_valid(tg->td);
if (tg->td->limit_valid[LIMIT_LOW]) {
if (index == LIMIT_LOW)
tg->td->limit_index = LIMIT_LOW;
tg->idletime_threshold = (idle_time == ULONG_MAX) ?
ULONG_MAX : idle_time;
tg->latency_target = (latency_time == ULONG_MAX) ?
ULONG_MAX : latency_time;
}
tg_conf_updated(tg);
} else
tg->td->limit_index = LIMIT_MAX;
tg_conf_updated(tg, index == LIMIT_LOW &&
tg->td->limit_valid[LIMIT_LOW]);
ret = 0;
out_finish:
blkg_conf_finish(&ctx);
Expand Down Expand Up @@ -1722,17 +1770,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
/*
* cgroup is idle if:
* - single idle is too long, longer than a fixed value (in case user
* configure a too big threshold) or 4 times of slice
* configure a too big threshold) or 4 times of idletime threshold
* - average think time is more than threshold
* - IO latency is largely below threshold
*/
unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);

time = min_t(unsigned long, MAX_IDLE_TIME, time);
return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
tg->avg_idletime > tg->idletime_threshold ||
(tg->latency_target && tg->bio_cnt &&
unsigned long time;
bool ret;

time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
ret = tg->latency_target == DFL_LATENCY_TARGET ||
tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
(ktime_get_ns() >> 10) - tg->last_finish_time > time ||
tg->avg_idletime > tg->idletime_threshold ||
(tg->latency_target && tg->bio_cnt &&
tg->bad_bio_cnt * 5 < tg->bio_cnt);
throtl_log(&tg->service_queue,
"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
tg->bio_cnt, ret, tg->td->scale);
return ret;
}

static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
Expand Down Expand Up @@ -1828,6 +1884,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
struct cgroup_subsys_state *pos_css;
struct blkcg_gq *blkg;

throtl_log(&td->service_queue, "upgrade to max");
td->limit_index = LIMIT_MAX;
td->low_upgrade_time = jiffies;
td->scale = 0;
Expand All @@ -1850,6 +1907,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
{
td->scale /= 2;

throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
if (td->scale) {
td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
return;
Expand Down Expand Up @@ -2023,6 +2081,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
td->avg_buckets[i].valid = true;
last_latency = td->avg_buckets[i].latency;
}

for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
throtl_log(&td->service_queue,
"Latency bucket %d: latency=%ld, valid=%d", i,
td->avg_buckets[i].latency, td->avg_buckets[i].valid);
}
#else
static inline void throtl_update_latency_buckets(struct throtl_data *td)
Expand Down Expand Up @@ -2354,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
void blk_throtl_register_queue(struct request_queue *q)
{
struct throtl_data *td;
struct cgroup_subsys_state *pos_css;
struct blkcg_gq *blkg;

td = q->td;
BUG_ON(!td);

if (blk_queue_nonrot(q)) {
if (blk_queue_nonrot(q))
td->throtl_slice = DFL_THROTL_SLICE_SSD;
td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
} else {
else
td->throtl_slice = DFL_THROTL_SLICE_HD;
td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
}
#ifndef CONFIG_BLK_DEV_THROTTLING_LOW
/* if no low limit, use previous default */
td->throtl_slice = DFL_THROTL_SLICE_HD;
Expand All @@ -2375,18 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
td->track_bio_latency = !q->mq_ops && !q->request_fn;
if (!td->track_bio_latency)
blk_stat_enable_accounting(q);

/*
* some tg are created before queue is fully initialized, eg, nonrot
* isn't initialized yet
*/
rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg);

tg->idletime_threshold = td->dft_idletime_threshold;
}
rcu_read_unlock();
}

#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
Expand Down
4 changes: 3 additions & 1 deletion block/partition-generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,

if (info) {
struct partition_meta_info *pinfo = alloc_part_info(disk);
if (!pinfo)
if (!pinfo) {
err = -ENOMEM;
goto out_free_stats;
}
memcpy(pinfo, info, sizeof(*info));
p->info = pinfo;
}
Expand Down
Loading

0 comments on commit 1b8f2ff

Please sign in to comment.