Skip to content

Commit

Permalink
blk-cgroup: separate out blkg_rwstat under CONFIG_BLK_CGROUP_RWSTAT
Browse files Browse the repository at this point in the history
blkg_rwstat is now only used by bfq-iosched and blk-throtl when on
cgroup1.  Let's move it into its own files and gate it behind a config
option.

Signed-off-by: Tejun Heo <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
htejun authored and axboe committed Nov 7, 2019
1 parent f733164 commit 1d15664
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 256 deletions.
4 changes: 4 additions & 0 deletions block/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ config BLK_RQ_ALLOC_TIME
config BLK_SCSI_REQUEST
bool

config BLK_CGROUP_RWSTAT
bool

config BLK_DEV_BSG
bool "Block layer SG support v4"
default y
Expand Down Expand Up @@ -86,6 +89,7 @@ config BLK_DEV_ZONED
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
select BLK_CGROUP_RWSTAT
---help---
Block layer bio throttling support. It can be used to limit
the IO rate to a device. IO rate policies are per cgroup and
Expand Down
1 change: 1 addition & 0 deletions block/Kconfig.iosched
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ config IOSCHED_BFQ
config BFQ_GROUP_IOSCHED
bool "BFQ hierarchical scheduling support"
depends on IOSCHED_BFQ && BLK_CGROUP
select BLK_CGROUP_RWSTAT
---help---

Enable hierarchical scheduling in BFQ, using the blkio
Expand Down
1 change: 1 addition & 0 deletions block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
Expand Down
2 changes: 2 additions & 0 deletions block/bfq-iosched.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <linux/hrtimer.h>
#include <linux/blk-cgroup.h>

#include "blk-cgroup-rwstat.h"

#define BFQ_IOPRIO_CLASSES 3
#define BFQ_CL_IDLE_TIMEOUT (HZ/5)

Expand Down
129 changes: 129 additions & 0 deletions block/blk-cgroup-rwstat.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
* Do not use in new code.
*/
#include "blk-cgroup-rwstat.h"

int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
{
int i, ret;

for (i = 0; i < BLKG_RWSTAT_NR; i++) {
ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
if (ret) {
while (--i >= 0)
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
return ret;
}
atomic64_set(&rwstat->aux_cnt[i], 0);
}
return 0;
}
EXPORT_SYMBOL_GPL(blkg_rwstat_init);

void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
{
int i;

for (i = 0; i < BLKG_RWSTAT_NR; i++)
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
}
EXPORT_SYMBOL_GPL(blkg_rwstat_exit);

/**
* __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
* @sf: seq_file to print to
* @pd: policy private data of interest
* @rwstat: rwstat to print
*
* Print @rwstat to @sf for the device assocaited with @pd.
*/
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
const struct blkg_rwstat_sample *rwstat)
{
static const char *rwstr[] = {
[BLKG_RWSTAT_READ] = "Read",
[BLKG_RWSTAT_WRITE] = "Write",
[BLKG_RWSTAT_SYNC] = "Sync",
[BLKG_RWSTAT_ASYNC] = "Async",
[BLKG_RWSTAT_DISCARD] = "Discard",
};
const char *dname = blkg_dev_name(pd->blkg);
u64 v;
int i;

if (!dname)
return 0;

for (i = 0; i < BLKG_RWSTAT_NR; i++)
seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
rwstat->cnt[i]);

v = rwstat->cnt[BLKG_RWSTAT_READ] +
rwstat->cnt[BLKG_RWSTAT_WRITE] +
rwstat->cnt[BLKG_RWSTAT_DISCARD];
seq_printf(sf, "%s Total %llu\n", dname, v);
return v;
}
EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);

/**
* blkg_prfill_rwstat - prfill callback for blkg_rwstat
* @sf: seq_file to print to
* @pd: policy private data of interest
* @off: offset to the blkg_rwstat in @pd
*
* prfill callback for printing a blkg_rwstat.
*/
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
int off)
{
struct blkg_rwstat_sample rwstat = { };

blkg_rwstat_read((void *)pd + off, &rwstat);
return __blkg_prfill_rwstat(sf, pd, &rwstat);
}
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);

/**
* blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
* @blkg: blkg of interest
* @pol: blkcg_policy which contains the blkg_rwstat
* @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
* @sum: blkg_rwstat_sample structure containing the results
*
* Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
* online descendants and their aux counts. The caller must be holding the
* queue lock for online tests.
*
* If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
* is at @off bytes into @blkg's blkg_policy_data of the policy.
*/
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
int off, struct blkg_rwstat_sample *sum)
{
struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css;
unsigned int i;

lockdep_assert_held(&blkg->q->queue_lock);

rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
struct blkg_rwstat *rwstat;

if (!pos_blkg->online)
continue;

if (pol)
rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
else
rwstat = (void *)pos_blkg + off;

for (i = 0; i < BLKG_RWSTAT_NR; i++)
sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
149 changes: 149 additions & 0 deletions block/blk-cgroup-rwstat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
* Do not use in new code.
*/
#ifndef _BLK_CGROUP_RWSTAT_H
#define _BLK_CGROUP_RWSTAT_H

#include <linux/blk-cgroup.h>

enum blkg_rwstat_type {
BLKG_RWSTAT_READ,
BLKG_RWSTAT_WRITE,
BLKG_RWSTAT_SYNC,
BLKG_RWSTAT_ASYNC,
BLKG_RWSTAT_DISCARD,

BLKG_RWSTAT_NR,
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
};

/*
* blkg_[rw]stat->aux_cnt is excluded for local stats but included for
* recursive. Used to carry stats of dead children.
*/
struct blkg_rwstat {
struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
atomic64_t aux_cnt[BLKG_RWSTAT_NR];
};

struct blkg_rwstat_sample {
u64 cnt[BLKG_RWSTAT_NR];
};

static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
unsigned int idx)
{
return atomic64_read(&rwstat->aux_cnt[idx]) +
percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
}

int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp);
void blkg_rwstat_exit(struct blkg_rwstat *rwstat);
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
const struct blkg_rwstat_sample *rwstat);
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
int off);
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
int off, struct blkg_rwstat_sample *sum);


/**
* blkg_rwstat_add - add a value to a blkg_rwstat
* @rwstat: target blkg_rwstat
* @op: REQ_OP and flags
* @val: value to add
*
* Add @val to @rwstat. The counters are chosen according to @rw. The
* caller is responsible for synchronizing calls to this function.
*/
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
unsigned int op, uint64_t val)
{
struct percpu_counter *cnt;

if (op_is_discard(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
else if (op_is_write(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];

percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);

if (op_is_sync(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];

percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
}

/**
* blkg_rwstat_read - read the current values of a blkg_rwstat
* @rwstat: blkg_rwstat to read
*
* Read the current snapshot of @rwstat and return it in the aux counts.
*/
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
struct blkg_rwstat_sample *result)
{
int i;

for (i = 0; i < BLKG_RWSTAT_NR; i++)
result->cnt[i] =
percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
}

/**
* blkg_rwstat_total - read the total count of a blkg_rwstat
* @rwstat: blkg_rwstat to read
*
* Return the total count of @rwstat regardless of the IO direction. This
* function can be called without synchronization and takes care of u64
* atomicity.
*/
static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
{
struct blkg_rwstat_sample tmp = { };

blkg_rwstat_read(rwstat, &tmp);
return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
}

/**
* blkg_rwstat_reset - reset a blkg_rwstat
* @rwstat: blkg_rwstat to reset
*/
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
{
int i;

for (i = 0; i < BLKG_RWSTAT_NR; i++) {
percpu_counter_set(&rwstat->cpu_cnt[i], 0);
atomic64_set(&rwstat->aux_cnt[i], 0);
}
}

/**
* blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
* @to: the destination blkg_rwstat
* @from: the source
*
* Add @from's count including the aux one to @to's aux count.
*/
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
struct blkg_rwstat *from)
{
u64 sum[BLKG_RWSTAT_NR];
int i;

for (i = 0; i < BLKG_RWSTAT_NR; i++)
sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);

for (i = 0; i < BLKG_RWSTAT_NR; i++)
atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
&to->aux_cnt[i]);
}
#endif /* _BLK_CGROUP_RWSTAT_H */
Loading

0 comments on commit 1d15664

Please sign in to comment.