Skip to content

Commit

Permalink
zram: writeback throttle
Browse files Browse the repository at this point in the history
If there are lots of write IO with flash device, it could have a
wearout problem of storage. To overcome the problem, admin needs
to design write limitation to guarantee flash health
for entire product life.

This patch creates a new knob "writeback_limit" for zram.

writeback_limit's default value is 0 so that it doesn't limit
any writeback. If admin want to measure writeback count in a
certain period, he could know it via /sys/block/zram0/bd_stat's
3rd column.

If admin want to limit writeback as per-day 400M, he could do it
like below.

	MB_SHIFT=20
	4K_SHIFT=12
	echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
		/sys/block/zram0/writeback_limit.

If admin want to allow further write again, he could do it like below

	echo 0 > /sys/block/zram0/writeback_limit

If admin want to see remaining writeback budget,

	cat /sys/block/zram0/writeback_limit

The writeback_limit count will reset whenever you reset zram (e.g., system
reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of writeback
happened until you reset the zram to allocate extra writeback budget in
next setting is user's job.

[[email protected]: v4]
  Link: http://lkml.kernel.org/r/[email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Minchan Kim <[email protected]>
Reviewed-by: Sergey Senozhatsky <[email protected]>
Cc: Joey Pabalinas <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
minchank authored and torvalds committed Dec 28, 2018
1 parent 23eddf3 commit bb416d1
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 3 deletions.
9 changes: 9 additions & 0 deletions Documentation/ABI/testing/sysfs-block-zram
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,12 @@ Description:
The bd_stat file is read-only and represents backing device's
statistics (bd_count, bd_reads, bd_writes) in a format
similar to block layer statistics file format.

What: /sys/block/zram<id>/writeback_limit
Date: November 2018
Contact: Minchan Kim <[email protected]>
Description:
The writeback_limit file is read-write and specifies the maximum
amount of writeback ZRAM can do. The limit could be changed
in run time and "0" means disable the limit.
No limit is the initial state.
31 changes: 31 additions & 0 deletions Documentation/blockdev/zram.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ reset WO trigger device reset
mem_used_max WO reset the `mem_used_max' counter (see later)
mem_limit WO specifies the maximum amount of memory ZRAM can use
to store the compressed data
writeback_limit WO specifies the maximum amount of write IO zram can
write out to backing device as 4KB unit
max_comp_streams RW the number of possible concurrent compress operations
comp_algorithm RW show and change the compression algorithm
compact WO trigger memory compaction
Expand Down Expand Up @@ -275,6 +277,35 @@ Admin can request writeback of those idle pages at right timing via

With the command, zram writeback idle pages from memory to the storage.

If there are lots of write IO with flash device, potentially, it has
flash wearout problem so that admin needs to design write limitation
to guarantee storage health for entire product life.
To overcome the concern, zram supports "writeback_limit".
The "writeback_limit"'s default value is 0 so that it doesn't limit
any writeback. If admin want to measure writeback count in a certain
period, he could know it via /sys/block/zram0/bd_stat's 3rd column.

If admin want to limit writeback as per-day 400M, he could do it
like below.

MB_SHIFT=20
4K_SHIFT=12
echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
/sys/block/zram0/writeback_limit.

If admin want to allow further write again, he could do it like below

echo 0 > /sys/block/zram0/writeback_limit

If admin want to see remaining writeback budget since he set,

cat /sys/block/zram0/writeback_limit

The writeback_limit count will reset whenever you reset zram(e.g.,
system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
writeback happened until you reset the zram to allocate extra writeback
budget in next setting is user's job.

= memory tracking

With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
Expand Down
52 changes: 49 additions & 3 deletions drivers/block/zram/zram_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,39 @@ static ssize_t idle_store(struct device *dev,
}

#ifdef CONFIG_ZRAM_WRITEBACK
static ssize_t writeback_limit_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
u64 val;
ssize_t ret = -EINVAL;

if (kstrtoull(buf, 10, &val))
return ret;

down_read(&zram->init_lock);
atomic64_set(&zram->stats.bd_wb_limit, val);
if (val == 0)
zram->stop_writeback = false;
up_read(&zram->init_lock);
ret = len;

return ret;
}

static ssize_t writeback_limit_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
u64 val;
struct zram *zram = dev_to_zram(dev);

down_read(&zram->init_lock);
val = atomic64_read(&zram->stats.bd_wb_limit);
up_read(&zram->init_lock);

return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}

static void reset_bdev(struct zram *zram)
{
struct block_device *bdev;
Expand Down Expand Up @@ -612,6 +645,11 @@ static ssize_t writeback_store(struct device *dev,
bvec.bv_len = PAGE_SIZE;
bvec.bv_offset = 0;

if (zram->stop_writeback) {
ret = -EIO;
break;
}

if (!blk_idx) {
blk_idx = alloc_block_bdev(zram);
if (!blk_idx) {
Expand Down Expand Up @@ -694,6 +732,11 @@ static ssize_t writeback_store(struct device *dev,
zram_set_element(zram, index, blk_idx);
blk_idx = 0;
atomic64_inc(&zram->stats.pages_stored);
if (atomic64_add_unless(&zram->stats.bd_wb_limit,
-1 << (PAGE_SHIFT - 12), 0)) {
if (atomic64_read(&zram->stats.bd_wb_limit) == 0)
zram->stop_writeback = true;
}
next:
zram_slot_unlock(zram, index);
}
Expand Down Expand Up @@ -1018,6 +1061,7 @@ static ssize_t mm_stat_show(struct device *dev,
}

#ifdef CONFIG_ZRAM_WRITEBACK
#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
static ssize_t bd_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
Expand All @@ -1027,9 +1071,9 @@ static ssize_t bd_stat_show(struct device *dev,
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
"%8llu %8llu %8llu\n",
(u64)atomic64_read(&zram->stats.bd_count) * (PAGE_SHIFT - 12),
(u64)atomic64_read(&zram->stats.bd_reads) * (PAGE_SHIFT - 12),
(u64)atomic64_read(&zram->stats.bd_writes) * (PAGE_SHIFT - 12));
FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
up_read(&zram->init_lock);

return ret;
Expand Down Expand Up @@ -1767,6 +1811,7 @@ static DEVICE_ATTR_RW(comp_algorithm);
#ifdef CONFIG_ZRAM_WRITEBACK
static DEVICE_ATTR_RW(backing_dev);
static DEVICE_ATTR_WO(writeback);
static DEVICE_ATTR_RW(writeback_limit);
#endif

static struct attribute *zram_disk_attrs[] = {
Expand All @@ -1782,6 +1827,7 @@ static struct attribute *zram_disk_attrs[] = {
#ifdef CONFIG_ZRAM_WRITEBACK
&dev_attr_backing_dev.attr,
&dev_attr_writeback.attr,
&dev_attr_writeback_limit.attr,
#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
Expand Down
2 changes: 2 additions & 0 deletions drivers/block/zram/zram_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct zram_stats {
atomic64_t bd_count; /* no. of pages in backing device */
atomic64_t bd_reads; /* no. of reads from backing device */
atomic64_t bd_writes; /* no. of writes from backing device */
atomic64_t bd_wb_limit; /* writeback limit of backing device */
#endif
};

Expand Down Expand Up @@ -113,6 +114,7 @@ struct zram {
*/
bool claim; /* Protected by bdev->bd_mutex */
struct file *backing_dev;
bool stop_writeback;
#ifdef CONFIG_ZRAM_WRITEBACK
struct block_device *bdev;
unsigned int old_block_size;
Expand Down

0 comments on commit bb416d1

Please sign in to comment.