Merge tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache

Pull folio updates from Matthew Wilcox: - Fix an accounting bug that made NR_FILE_DIRTY grow without limit when running xfstests - Convert more of mpage to use folios - Remove add_to_page_cache() and add_to_page_cache_locked() - Convert find_get_pages_range() to filemap_get_folios() - Improvements to the read_cache_page() family of functions - Remove a few unnecessary checks of PageError - Some straightforward filesystem conversions to use folios - Split PageMovable users out from address_space_operations into their own movable_operations - Convert aops->migratepage to aops->migrate_folio - Remove nobh support (Christoph Hellwig) * tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache: (78 commits) fs: remove the NULL get_block case in mpage_writepages fs: don't call ->writepage from __mpage_writepage fs: remove the nobh helpers jfs: stop using the nobh helper ext2: remove nobh support ntfs3: refactor ntfs_writepages mm/folio-compat: Remove migration compatibility functions fs: Remove aops->migratepage() secretmem: Convert to migrate_folio hugetlb: Convert to migrate_folio aio: Convert to migrate_folio f2fs: Convert to filemap_migrate_folio() ubifs: Convert to filemap_migrate_folio() btrfs: Convert btrfs_migratepage to migrate_folio mm/migrate: Add filemap_migrate_folio() mm/migrate: Convert migrate_page() to migrate_folio() nfs: Convert to migrate_folio btrfs: Convert btree_migratepage to migrate_folio mm/migrate: Convert expected_page_refs() to folio_expected_refs() mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio() ...
josf · Aug 3, 2022 · f006540 · f006540
2 parents e087437 + cf5e7a6
commit f006540
Show file tree

Hide file tree

Showing 97 changed files with 830 additions and 1,887 deletions.
diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
@@ -97,7 +97,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 =============
 
 	Page Cache is charged at
-	- add_to_page_cache_locked().
+	- filemap_add_folio().
 
 	The logic is very clear. (About migration, see below)
 

diff --git a/Documentation/filesystems/ext2.rst b/Documentation/filesystems/ext2.rst
@@ -59,8 +59,6 @@ acl				Enable POSIX Access Control Lists support
 				(requires CONFIG_EXT2_FS_POSIX_ACL).
 noacl				Don't support POSIX ACLs.
 
-nobh				Do not attach buffer_heads to file pagecache.
-
 quota, usrquota			Enable user disk quota support
 				(requires CONFIG_QUOTA).
 

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
@@ -252,9 +252,8 @@ prototypes::
 	bool (*release_folio)(struct folio *, gfp_t);
 	void (*free_folio)(struct folio *);
 	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
-	bool (*isolate_page) (struct page *, isolate_mode_t);
-	int (*migratepage)(struct address_space *, struct page *, struct page *);
-	void (*putback_page) (struct page *);
+	int (*migrate_folio)(struct address_space *, struct folio *dst,
+			struct folio *src, enum migrate_mode);
 	int (*launder_folio)(struct folio *);
 	bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
 	int (*error_remove_page)(struct address_space *, struct page *);
@@ -280,9 +279,7 @@ invalidate_folio:	yes					exclusive
 release_folio:		yes
 free_folio:		yes
 direct_IO:
-isolate_page:		yes
-migratepage:		yes (both)
-putback_page:		yes
+migrate_folio:		yes (both)
 launder_folio:		yes
 is_partially_uptodate:	yes
 error_remove_page:	yes

diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
@@ -737,12 +737,8 @@ cache in your filesystem.  The following members are defined:
 		bool (*release_folio)(struct folio *, gfp_t);
 		void (*free_folio)(struct folio *);
 		ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
-		/* isolate a page for migration */
-		bool (*isolate_page) (struct page *, isolate_mode_t);
-		/* migrate the contents of a page to the specified target */
-		int (*migratepage) (struct page *, struct page *);
-		/* put migration-failed page back to right list */
-		void (*putback_page) (struct page *);
+		int (*migrate_folio)(struct mapping *, struct folio *dst,
+				struct folio *src, enum migrate_mode);
 		int (*launder_folio) (struct folio *);
 
 		bool (*is_partially_uptodate) (struct folio *, size_t from,
@@ -774,13 +770,38 @@ cache in your filesystem.  The following members are defined:
 	See the file "Locking" for more details.
 
 ``read_folio``
-	called by the VM to read a folio from backing store.  The folio
-	will be locked when read_folio is called, and should be unlocked
-	and marked uptodate once the read completes.  If ->read_folio
-	discovers that it cannot perform the I/O at this time, it can
-        unlock the folio and return AOP_TRUNCATED_PAGE.  In this case,
-	the folio will be looked up again, relocked and if that all succeeds,
-	->read_folio will be called again.
+	Called by the page cache to read a folio from the backing store.
+	The 'file' argument supplies authentication information to network
+	filesystems, and is generally not used by block based filesystems.
+	It may be NULL if the caller does not have an open file (eg if
+	the kernel is performing a read for itself rather than on behalf
+	of a userspace process with an open file).
+
+	If the mapping does not support large folios, the folio will
+	contain a single page.	The folio will be locked when read_folio
+	is called.  If the read completes successfully, the folio should
+	be marked uptodate.  The filesystem should unlock the folio
+	once the read has completed, whether it was successful or not.
+	The filesystem does not need to modify the refcount on the folio;
+	the page cache holds a reference count and that will not be
+	released until the folio is unlocked.
+
+	Filesystems may implement ->read_folio() synchronously.
+	In normal operation, folios are read through the ->readahead()
+	method.  Only if this fails, or if the caller needs to wait for
+	the read to complete will the page cache call ->read_folio().
+	Filesystems should not attempt to perform their own readahead
+	in the ->read_folio() operation.
+
+	If the filesystem cannot perform the read at this time, it can
+	unlock the folio, do whatever action it needs to ensure that the
+	read will succeed in the future and return AOP_TRUNCATED_PAGE.
+	In this case, the caller should look up the folio, lock it,
+	and call ->read_folio again.
+
+	Callers may invoke the ->read_folio() method directly, but using
+	read_mapping_folio() will take care of locking, waiting for the
+	read to complete and handle cases such as AOP_TRUNCATED_PAGE.
 
 ``writepages``
 	called by the VM to write out pages associated with the
@@ -905,20 +926,12 @@ cache in your filesystem.  The following members are defined:
 	data directly between the storage and the application's address
 	space.
 
-``isolate_page``
-	Called by the VM when isolating a movable non-lru page.  If page
-	is successfully isolated, VM marks the page as PG_isolated via
-	__SetPageIsolated.
-
-``migrate_page``
+``migrate_folio``
 	This is used to compact the physical memory usage.  If the VM
-	wants to relocate a page (maybe off a memory card that is
-	signalling imminent failure) it will pass a new page and an old
-	page to this function.  migrate_page should transfer any private
-	data across and update any references that it has to the page.
-
-``putback_page``
-	Called by the VM when isolated page's migration fails.
+	wants to relocate a folio (maybe from a memory device that is
+	signalling imminent failure) it will pass a new folio and an old
+	folio to this function.  migrate_folio should transfer any private
+	data across and update any references that it has to the folio.
 
 ``launder_folio``
 	Called before freeing a folio - it writes back the dirty folio.

diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
@@ -152,110 +152,15 @@ Steps:
 Non-LRU page migration
 ======================
 
-Although migration originally aimed for reducing the latency of memory accesses
-for NUMA, compaction also uses migration to create high-order pages.
+Although migration originally aimed for reducing the latency of memory
+accesses for NUMA, compaction also uses migration to create high-order
+pages.  For compaction purposes, it is also useful to be able to move
+non-LRU pages, such as zsmalloc and virtio-balloon pages.
 
-Current problem of the implementation is that it is designed to migrate only
-*LRU* pages. However, there are potential non-LRU pages which can be migrated
-in drivers, for example, zsmalloc, virtio-balloon pages.
-
-For virtio-balloon pages, some parts of migration code path have been hooked
-up and added virtio-balloon specific functions to intercept migration logics.
-It's too specific to a driver so other drivers who want to make their pages
-movable would have to add their own specific hooks in the migration path.
-
-To overcome the problem, VM supports non-LRU page migration which provides
-generic functions for non-LRU movable pages without driver specific hooks
-in the migration path.
-
-If a driver wants to make its pages movable, it should define three functions
-which are function pointers of struct address_space_operations.
-
-1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
-
-   What VM expects from isolate_page() function of driver is to return *true*
-   if driver isolates the page successfully. On returning true, VM marks the page
-   as PG_isolated so concurrent isolation in several CPUs skip the page
-   for isolation. If a driver cannot isolate the page, it should return *false*.
-
-   Once page is successfully isolated, VM uses page.lru fields so driver
-   shouldn't expect to preserve values in those fields.
-
-2. ``int (*migratepage) (struct address_space *mapping,``
-|	``struct page *newpage, struct page *oldpage, enum migrate_mode);``
-
-   After isolation, VM calls migratepage() of driver with the isolated page.
-   The function of migratepage() is to move the contents of the old page to the
-   new page
-   and set up fields of struct page newpage. Keep in mind that you should
-   indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
-   under page_lock if you migrated the oldpage successfully and returned
-   MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
-   can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
-   because VM interprets -EAGAIN as "temporary migration failure". On returning
-   any error except -EAGAIN, VM will give up the page migration without
-   retrying.
-
-   Driver shouldn't touch the page.lru field while in the migratepage() function.
-
-3. ``void (*putback_page)(struct page *);``
-
-   If migration fails on the isolated page, VM should return the isolated page
-   to the driver so VM calls the driver's putback_page() with the isolated page.
-   In this function, the driver should put the isolated page back into its own data
-   structure.
-
-Non-LRU movable page flags
-
-   There are two page flags for supporting non-LRU movable page.
-
-   * PG_movable
-
-     Driver should use the function below to make page movable under page_lock::
-
-	void __SetPageMovable(struct page *page, struct address_space *mapping)
-
-     It needs argument of address_space for registering migration
-     family functions which will be called by VM. Exactly speaking,
-     PG_movable is not a real flag of struct page. Rather, VM
-     reuses the page->mapping's lower bits to represent it::
-
-	#define PAGE_MAPPING_MOVABLE 0x2
-	page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
-
-     so driver shouldn't access page->mapping directly. Instead, driver should
-     use page_mapping() which masks off the low two bits of page->mapping under
-     page lock so it can get the right struct address_space.
-
-     For testing of non-LRU movable pages, VM supports __PageMovable() function.
-     However, it doesn't guarantee to identify non-LRU movable pages because
-     the page->mapping field is unified with other variables in struct page.
-     If the driver releases the page after isolation by VM, page->mapping
-     doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set
-     (look at __ClearPageMovable). But __PageMovable() is cheap to call whether
-     page is LRU or non-LRU movable once the page has been isolated because LRU
-     pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also
-     good for just peeking to test non-LRU movable pages before more expensive
-     checking with lock_page() in pfn scanning to select a victim.
-
-     For guaranteeing non-LRU movable page, VM provides PageMovable() function.
-     Unlike __PageMovable(), PageMovable() validates page->mapping and
-     mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents
-     sudden destroying of page->mapping.
-
-     Drivers using __SetPageMovable() should clear the flag via
-     __ClearMovablePage() under page_lock() before the releasing the page.
-
-   * PG_isolated
-
-     To prevent concurrent isolation among several CPUs, VM marks isolated page
-     as PG_isolated under lock_page(). So if a CPU encounters PG_isolated
-     non-LRU movable page, it can skip it. Driver doesn't need to manipulate the
-     flag because VM will set/clear it automatically. Keep in mind that if the
-     driver sees a PG_isolated page, it means the page has been isolated by the
-     VM so it shouldn't touch the page.lru field.
-     The PG_isolated flag is aliased with the PG_reclaim flag so drivers
-     shouldn't use PG_isolated for its own purposes.
+If a driver wants to make its pages movable, it should define a struct
+movable_operations.  It then needs to call __SetPageMovable() on each
+page that it may be able to move.  This uses the ``page->mapping`` field,
+so this field is not available for the driver to use for other purposes.
 
 Monitoring Migration
 =====================
@@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase.
 
 Christoph Lameter, May 8, 2006.
 Minchan Kim, Mar 28, 2016.
+
+.. kernel-doc:: include/linux/migrate.h
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
@@ -19,9 +19,6 @@
 #include <linux/stringify.h>
 #include <linux/swap.h>
 #include <linux/device.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
-#include <linux/magic.h>
 #include <linux/balloon_compaction.h>
 #include <asm/firmware.h>
 #include <asm/hvcall.h>
@@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = {
 };
 
 #ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-
-static int cmm_init_fs_context(struct fs_context *fc)
-{
-	return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
-	.name = "ppc-cmm",
-	.init_fs_context = cmm_init_fs_context,
-	.kill_sb = kill_anon_super,
-};
-
 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
 			   struct page *newpage, struct page *page,
 			   enum migrate_mode mode)
@@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
 	return MIGRATEPAGE_SUCCESS;
 }
 
-static int cmm_balloon_compaction_init(void)
+static void cmm_balloon_compaction_init(void)
 {
-	int rc;
-
 	balloon_devinfo_init(&b_dev_info);
 	b_dev_info.migratepage = cmm_migratepage;
-
-	balloon_mnt = kern_mount(&balloon_fs);
-	if (IS_ERR(balloon_mnt)) {
-		rc = PTR_ERR(balloon_mnt);
-		balloon_mnt = NULL;
-		return rc;
-	}
-
-	b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
-	if (IS_ERR(b_dev_info.inode)) {
-		rc = PTR_ERR(b_dev_info.inode);
-		b_dev_info.inode = NULL;
-		kern_unmount(balloon_mnt);
-		balloon_mnt = NULL;
-		return rc;
-	}
-
-	b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
-	return 0;
-}
-static void cmm_balloon_compaction_deinit(void)
-{
-	if (b_dev_info.inode)
-		iput(b_dev_info.inode);
-	b_dev_info.inode = NULL;
-	kern_unmount(balloon_mnt);
-	balloon_mnt = NULL;
 }
 #else /* CONFIG_BALLOON_COMPACTION */
-static int cmm_balloon_compaction_init(void)
-{
-	return 0;
-}
-
-static void cmm_balloon_compaction_deinit(void)
+static void cmm_balloon_compaction_init(void)
 {
 }
 #endif /* CONFIG_BALLOON_COMPACTION */
@@ -622,9 +572,7 @@ static int cmm_init(void)
 	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
 		return -EOPNOTSUPP;
 
-	rc = cmm_balloon_compaction_init();
-	if (rc)
-		return rc;
+	cmm_balloon_compaction_init();
 
 	rc = register_oom_notifier(&cmm_oom_nb);
 	if (rc < 0)
@@ -658,7 +606,6 @@ static int cmm_init(void)
 out_oom_notifier:
 	unregister_oom_notifier(&cmm_oom_nb);
 out_balloon_compaction:
-	cmm_balloon_compaction_deinit();
 	return rc;
 }
 
@@ -677,7 +624,6 @@ static void cmm_exit(void)
 	unregister_memory_notifier(&cmm_mem_nb);
 	cmm_free_pages(atomic_long_read(&loaned_pages));
 	cmm_unregister_sysfs(&cmm_dev);
-	cmm_balloon_compaction_deinit();
 }
 
 /**

diff --git a/block/fops.c b/block/fops.c
@@ -421,7 +421,7 @@ const struct address_space_operations def_blk_aops = {
 	.write_end	= blkdev_write_end,
 	.writepages	= blkdev_writepages,
 	.direct_IO	= blkdev_direct_IO,
-	.migratepage	= buffer_migrate_page_norefs,
+	.migrate_folio	= buffer_migrate_folio_norefs,
 	.is_dirty_writeback = buffer_check_dirty_writeback,
 };
 

diff --git a/block/partitions/check.h b/block/partitions/check.h
@@ -24,13 +24,13 @@ struct parsed_partitions {
 };
 
 typedef struct {
-	struct page *v;
+	struct folio *v;
 } Sector;
 
 void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
 static inline void put_dev_sector(Sector p)
 {
-	put_page(p.v);
+	folio_put(p.v);
 }
 
 static inline void