Skip to content

Commit

Permalink
mm/writeback: discard NR_UNSTABLE_NFS, use NR_WRITEBACK instead
Browse files Browse the repository at this point in the history
After an NFS page has been written it is considered "unstable" until a
COMMIT request succeeds.  If the COMMIT fails, the page will be
re-written.

These "unstable" pages are currently accounted as "reclaimable", either
in WB_RECLAIMABLE, or in NR_UNSTABLE_NFS which is included in a
'reclaimable' count.  This might have made sense when sending the COMMIT
required a separate action by the VFS/MM (e.g.  releasepage() used to
send a COMMIT).  However now that all writes generated by ->writepages()
will automatically be followed by a COMMIT (since commit 919e3bd
("NFS: Ensure we commit after writeback is complete")) it makes more
sense to treat them as writeback pages.

So this patch removes NR_UNSTABLE_NFS and accounts unstable pages in
NR_WRITEBACK and WB_WRITEBACK.

A particular effect of this change is that when
wb_check_background_flush() calls wb_over_bg_threshold(), the latter
will report 'true' a lot less often as the 'unstable' pages are no
longer considered 'dirty' (as there is nothing that writeback can do
about them anyway).

Currently wb_check_background_flush() will trigger writeback to NFS even
when there are relatively few dirty pages (if there are lots of unstable
pages), this can result in small writes going to the server (10s of
Kilobytes rather than a Megabyte) which hurts throughput.  With this
patch, there are fewer writes which are each larger on average.

Where the NR_UNSTABLE_NFS count was included in statistics
virtual-files, the entry is retained, but the value is hard-coded as
zero.  static trace points and warning printks which mentioned this
counter no longer report it.

[[email protected]: re-layout comment]
[[email protected]: fix printk warning]
Signed-off-by: NeilBrown <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Acked-by: Trond Myklebust <[email protected]>
Acked-by: Michal Hocko <[email protected]>	[mm]
Cc: Christoph Hellwig <[email protected]>
Cc: Chuck Lever <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
neilbrown authored and torvalds committed Jun 2, 2020
1 parent a37b071 commit 8d92890
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 36 deletions.
4 changes: 2 additions & 2 deletions Documentation/filesystems/proc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1042,8 +1042,8 @@ PageTables
amount of memory dedicated to the lowest level of page
tables.
NFS_Unstable
NFS pages sent to the server, but not yet committed to stable
storage
Always zero. Previous counted pages which had been written to
the server, but has not been committed to stable storage.
Bounce
Memory used for block device "bounce buffers"
WritebackTmp
Expand Down
2 changes: 1 addition & 1 deletion drivers/base/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, 0UL,
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
nid, K(sreclaimable +
Expand Down
1 change: 0 additions & 1 deletion fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
static unsigned long get_nr_dirty_pages(void)
{
return global_node_page_state(NR_FILE_DIRTY) +
global_node_page_state(NR_UNSTABLE_NFS) +
get_nr_dirty_inodes();
}

Expand Down
10 changes: 7 additions & 3 deletions fs/nfs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,16 +668,20 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
}

/*
* Record the page as unstable and mark its inode as dirty.
* Record the page as unstable (an extra writeback period) and mark its
* inode as dirty.
*/
static inline
void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
{
if (!cinfo->dreq) {
struct inode *inode = page_file_mapping(page)->host;

inc_node_page_state(page, NR_UNSTABLE_NFS);
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
/* This page is really still in write-back - just that the
* writeback is happening on the server now.
*/
inc_node_page_state(page, NR_WRITEBACK);
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
}
Expand Down
4 changes: 2 additions & 2 deletions fs/nfs/write.c
Original file line number Diff line number Diff line change
Expand Up @@ -946,9 +946,9 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
static void
nfs_clear_page_commit(struct page *page)
{
dec_node_page_state(page, NR_UNSTABLE_NFS);
dec_node_page_state(page, NR_WRITEBACK);
dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
WB_RECLAIMABLE);
WB_WRITEBACK);
}

/* Called holding the request lock on @req */
Expand Down
3 changes: 1 addition & 2 deletions fs/proc/meminfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

show_val_kb(m, "NFS_Unstable: ",
global_node_page_state(NR_UNSTABLE_NFS));
show_val_kb(m, "NFS_Unstable: ", 0);
show_val_kb(m, "Bounce: ",
global_zone_page_state(NR_BOUNCE));
show_val_kb(m, "WritebackTmp: ",
Expand Down
1 change: 0 additions & 1 deletion include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ enum node_stat_item {
NR_FILE_THPS,
NR_FILE_PMDMAPPED,
NR_ANON_THPS,
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_VMSCAN_WRITE,
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
NR_DIRTIED, /* page dirtyings since bootup */
Expand Down
5 changes: 1 addition & 4 deletions include/trace/events/writeback.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,6 @@ TRACE_EVENT(global_dirty_state,
TP_STRUCT__entry(
__field(unsigned long, nr_dirty)
__field(unsigned long, nr_writeback)
__field(unsigned long, nr_unstable)
__field(unsigned long, background_thresh)
__field(unsigned long, dirty_thresh)
__field(unsigned long, dirty_limit)
Expand All @@ -552,20 +551,18 @@ TRACE_EVENT(global_dirty_state,
TP_fast_assign(
__entry->nr_dirty = global_node_page_state(NR_FILE_DIRTY);
__entry->nr_writeback = global_node_page_state(NR_WRITEBACK);
__entry->nr_unstable = global_node_page_state(NR_UNSTABLE_NFS);
__entry->nr_dirtied = global_node_page_state(NR_DIRTIED);
__entry->nr_written = global_node_page_state(NR_WRITTEN);
__entry->background_thresh = background_thresh;
__entry->dirty_thresh = dirty_thresh;
__entry->dirty_limit = global_wb_domain.dirty_limit;
),

TP_printk("dirty=%lu writeback=%lu unstable=%lu "
TP_printk("dirty=%lu writeback=%lu "
"bg_thresh=%lu thresh=%lu limit=%lu "
"dirtied=%lu written=%lu",
__entry->nr_dirty,
__entry->nr_writeback,
__entry->nr_unstable,
__entry->background_thresh,
__entry->dirty_thresh,
__entry->dirty_limit,
Expand Down
1 change: 0 additions & 1 deletion mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -4330,7 +4330,6 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,

*pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);

/* this should eventually include NR_UNSTABLE_NFS */
*pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
*pfilepages = memcg_exact_page_state(memcg, NR_INACTIVE_FILE) +
memcg_exact_page_state(memcg, NR_ACTIVE_FILE);
Expand Down
17 changes: 4 additions & 13 deletions mm/page-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,6 @@ bool node_dirty_ok(struct pglist_data *pgdat)
unsigned long nr_pages = 0;

nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
nr_pages += node_page_state(pgdat, NR_WRITEBACK);

return nr_pages <= limit;
Expand Down Expand Up @@ -758,7 +757,7 @@ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
* bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
*
* Return: @wb's dirty limit in pages. The term "dirty" in the context of
* dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
* dirty balancing includes all PG_dirty and PG_writeback pages.
*/
static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
{
Expand Down Expand Up @@ -1566,7 +1565,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
&mdtc_stor : NULL;
struct dirty_throttle_control *sdtc;
unsigned long nr_reclaimable; /* = file_dirty + unstable_nfs */
unsigned long nr_reclaimable; /* = file_dirty */
long period;
long pause;
long max_pause;
Expand All @@ -1586,14 +1585,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
unsigned long m_thresh = 0;
unsigned long m_bg_thresh = 0;

/*
* Unstable writes are a feature of certain networked
* filesystems (i.e. NFS) in which data may have been
* written to the server's write cache, but has not yet
* been flushed to permanent storage.
*/
nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
global_node_page_state(NR_UNSTABLE_NFS);
nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
gdtc->avail = global_dirtyable_memory();
gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);

Expand Down Expand Up @@ -1963,8 +1955,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
* as we're trying to decide whether to put more under writeback.
*/
gdtc->avail = global_dirtyable_memory();
gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
global_node_page_state(NR_UNSTABLE_NFS);
gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
domain_dirty_limits(gdtc);

if (gdtc->dirty > gdtc->bg_thresh)
Expand Down
5 changes: 1 addition & 4 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5319,7 +5319,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)

printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
" unevictable:%lu dirty:%lu writeback:%lu\n"
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
" free:%lu free_pcp:%lu free_cma:%lu\n",
Expand All @@ -5332,7 +5332,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
global_node_page_state(NR_UNEVICTABLE),
global_node_page_state(NR_FILE_DIRTY),
global_node_page_state(NR_WRITEBACK),
global_node_page_state(NR_UNSTABLE_NFS),
global_node_page_state(NR_SLAB_RECLAIMABLE),
global_node_page_state(NR_SLAB_UNRECLAIMABLE),
global_node_page_state(NR_FILE_MAPPED),
Expand Down Expand Up @@ -5365,7 +5364,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" anon_thp: %lukB"
#endif
" writeback_tmp:%lukB"
" unstable:%lukB"
" all_unreclaimable? %s"
"\n",
pgdat->node_id,
Expand All @@ -5387,7 +5385,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
#endif
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
"yes" : "no");
}
Expand Down
11 changes: 9 additions & 2 deletions mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
TEXT_FOR_HIGHMEM(xx) xx "_movable",

const char * const vmstat_text[] = {
/* enum zone_stat_item countes */
/* enum zone_stat_item counters */
"nr_free_pages",
"nr_zone_inactive_anon",
"nr_zone_active_anon",
Expand Down Expand Up @@ -1162,7 +1162,6 @@ const char * const vmstat_text[] = {
"nr_file_hugepages",
"nr_file_pmdmapped",
"nr_anon_transparent_hugepages",
"nr_unstable",
"nr_vmscan_write",
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
Expand Down Expand Up @@ -1723,6 +1722,14 @@ static int vmstat_show(struct seq_file *m, void *arg)
seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n');

if (off == NR_VMSTAT_ITEMS - 1) {
/*
* We've come to the end - add any deprecated counters to avoid
* breaking userspace which might depend on them being present.
*/
seq_puts(m, "nr_unstable 0\n");
}
return 0;
}

Expand Down

0 comments on commit 8d92890

Please sign in to comment.