Skip to content

Commit

Permalink
Merge tag 'mm-stable-2022-05-27' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/akpm/mm

Pull more MM updates from Andrew Morton:

 - Two follow-on fixes for the post-5.19 series "Use pageblock_order for
   cma and alloc_contig_range alignment", from Zi Yan.

 - A series of z3fold cleanups and fixes from Miaohe Lin.

 - Some memcg selftests work from Michal Koutný <[email protected]>

 - Some swap fixes and cleanups from Miaohe Lin

 - Several individual minor fixups

* tag 'mm-stable-2022-05-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (25 commits)
  mm/shmem.c: suppress shift warning
  mm: Kconfig: reorganize misplaced mm options
  mm: kasan: fix input of vmalloc_to_page()
  mm: fix is_pinnable_page against a cma page
  mm: filter out swapin error entry in shmem mapping
  mm/shmem: fix infinite loop when swap in shmem error at swapoff time
  mm/madvise: free hwpoison and swapin error entry in madvise_free_pte_range
  mm/swapfile: fix lost swap bits in unuse_pte()
  mm/swapfile: unuse_pte can map random data if swap read fails
  selftests: memcg: factor out common parts of memory.{low,min} tests
  selftests: memcg: remove protection from top level memcg
  selftests: memcg: adjust expected reclaim values of protected cgroups
  selftests: memcg: expect no low events in unprotected sibling
  selftests: memcg: fix compilation
  mm/z3fold: fix z3fold_page_migrate races with z3fold_map
  mm/z3fold: fix z3fold_reclaim_page races with z3fold_free
  mm/z3fold: always clear PAGE_CLAIMED under z3fold page lock
  mm/z3fold: put z3fold page back into unbuddied list when reclaim or migration fails
  revert "mm/z3fold.c: allow __GFP_HIGHMEM in z3fold_alloc"
  mm/z3fold: throw warning on failure of trylock_page in z3fold_alloc
  ...
  • Loading branch information
torvalds committed May 27, 2022
2 parents 77fb622 + fa020a2 commit 8291eaa
Show file tree
Hide file tree
Showing 20 changed files with 436 additions and 364 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -5062,6 +5062,7 @@ L: [email protected]
S: Maintained
F: mm/memcontrol.c
F: mm/swap_cgroup.c
F: tools/testing/selftests/cgroup/memcg_protection.m
F: tools/testing/selftests/cgroup/test_kmem.c
F: tools/testing/selftests/cgroup/test_memcontrol.c

Expand Down
9 changes: 7 additions & 2 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1594,8 +1594,13 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
#ifdef CONFIG_MIGRATION
static inline bool is_pinnable_page(struct page *page)
{
return !(is_zone_movable_page(page) || is_migrate_cma_page(page)) ||
is_zero_pfn(page_to_pfn(page));
#ifdef CONFIG_CMA
int mt = get_pageblock_migratetype(page);

if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
return !(is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)));
}
#else
static inline bool is_pinnable_page(struct page *page)
Expand Down
7 changes: 6 additions & 1 deletion include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ static inline int current_is_kswapd(void)
* actions on faults.
*/

#define SWP_SWAPIN_ERROR_NUM 1
#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
SWP_PTE_MARKER_NUM)
/*
* PTE markers are used to persist information onto PTEs that are mapped with
* file-backed memories. As its name "PTE" hints, it should only be applied to
Expand Down Expand Up @@ -120,7 +124,8 @@ static inline int current_is_kswapd(void)

#define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)

/*
* Magic header for a swap area. The first part of the union is
Expand Down
10 changes: 10 additions & 0 deletions include/linux/swapops.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
return xa_mk_value(entry.val);
}

static inline swp_entry_t make_swapin_error_entry(struct page *page)
{
return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
}

static inline int is_swapin_error_entry(swp_entry_t entry)
{
return swp_type(entry) == SWP_SWAPIN_ERROR;
}

#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
Expand Down
54 changes: 0 additions & 54 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1842,60 +1842,6 @@ config DEBUG_PERF_USE_VMALLOC

endmenu

config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EXPERT
help
VM event counters are needed for event counts to be shown.
This option allows the disabling of the VM event counters
on EXPERT systems. /proc/vmstat will only show page counts
if VM event counters are disabled.

config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
SLUB sysfs support. /sys/slab will not exist and there will be
no support for cache validation etc.

config COMPAT_BRK
bool "Disable heap randomization"
default y
help
Randomizing heap placement makes heap exploits harder, but it
also breaks ancient binaries (including anything libc5 based).
This option changes the bootup default to heap randomization
disabled, and can be overridden at runtime by setting
/proc/sys/kernel/randomize_va_space to 2.

On non-ancient distros (post-2000 ones) N is usually a safe choice.

config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
default n
help
Normally, and according to the Linux spec, anonymous memory obtained
from mmap() has its contents cleared before it is passed to
userspace. Enabling this config option allows you to request that
mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
providing a huge performance boost. If this option is not enabled,
then the flag will be ignored.

This is taken advantage of by uClibc's malloc(), and also by
ELF-FDPIC binfmt's brk and stack allocator.

Because of the obvious security issues, this option should only be
enabled on embedded devices where you control what is run in
userspace. Since that isn't generally a problem on no-MMU systems,
it is normally safe to say Y here.

See Documentation/admin-guide/mm/nommu-mmap.rst for more information.

config SYSTEM_DATA_VERIFICATION
def_bool n
select SYSTEM_TRUSTED_KEYRING
Expand Down
35 changes: 0 additions & 35 deletions lib/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -699,41 +699,6 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT
help
Debug objects boot parameter default value

config DEBUG_SLAB
bool "Debug slab memory allocations"
depends on DEBUG_KERNEL && SLAB
help
Say Y here to have the kernel do limited verification on memory
allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower.

config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n
help
Boot with debugging on by default. SLUB boots by default with
the runtime debug capabilities switched off. Enabling this is
equivalent to specifying the "slub_debug" parameter on boot.
There is no support for more fine grained debug control like
possible with slub_debug=xxx. SLUB debugging may be switched
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
"slub_debug=-".

config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
enabled for production use since keeping statistics slows down
the allocator by a few percentage points. The slabinfo command
supports the determination of the most active slabs to figure
out which slabs are relevant to a particular load.
Try running: slabinfo -DA

config HAVE_DEBUG_KMEMLEAK
bool

Expand Down
56 changes: 56 additions & 0 deletions mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,19 @@ config SLAB_FREELIST_HARDENED
sanity-checking than others. This option is most effective with
CONFIG_SLUB.

config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
enabled for production use since keeping statistics slows down
the allocator by a few percentage points. The slabinfo command
supports the determination of the most active slabs to figure
out which slabs are relevant to a particular load.
Try running: slabinfo -DA

config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP
Expand Down Expand Up @@ -307,6 +320,40 @@ config SHUFFLE_PAGE_ALLOCATOR

Say Y if unsure.

config COMPAT_BRK
bool "Disable heap randomization"
default y
help
Randomizing heap placement makes heap exploits harder, but it
also breaks ancient binaries (including anything libc5 based).
This option changes the bootup default to heap randomization
disabled, and can be overridden at runtime by setting
/proc/sys/kernel/randomize_va_space to 2.

On non-ancient distros (post-2000 ones) N is usually a safe choice.

config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
default n
help
Normally, and according to the Linux spec, anonymous memory obtained
from mmap() has its contents cleared before it is passed to
userspace. Enabling this config option allows you to request that
mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
providing a huge performance boost. If this option is not enabled,
then the flag will be ignored.

This is taken advantage of by uClibc's malloc(), and also by
ELF-FDPIC binfmt's brk and stack allocator.

Because of the obvious security issues, this option should only be
enabled on embedded devices where you control what is run in
userspace. Since that isn't generally a problem on no-MMU systems,
it is normally safe to say Y here.

See Documentation/admin-guide/mm/nommu-mmap.rst for more information.

config SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SELECT_MEMORY_MODEL
Expand Down Expand Up @@ -964,6 +1011,15 @@ config ARCH_USES_HIGH_VMA_FLAGS
config ARCH_HAS_PKEYS
bool

config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EXPERT
help
VM event counters are needed for event counts to be shown.
This option allows the disabling of the VM event counters
on EXPERT systems. /proc/vmstat will only show page counts
if VM event counters are disabled.

config PERCPU_STATS
bool "Collect percpu memory statistics"
help
Expand Down
33 changes: 33 additions & 0 deletions mm/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,39 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
Enable debug page memory allocations by default? This value
can be overridden by debug_pagealloc=off|on.

config DEBUG_SLAB
bool "Debug slab memory allocations"
depends on DEBUG_KERNEL && SLAB
help
Say Y here to have the kernel do limited verification on memory
allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower.

config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
SLUB sysfs support. /sys/slab will not exist and there will be
no support for cache validation etc.

config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n
help
Boot with debugging on by default. SLUB boots by default with
the runtime debug capabilities switched off. Enabling this is
equivalent to specifying the "slub_debug" parameter on boot.
There is no support for more fine grained debug control like
possible with slub_debug=xxx. SLUB debugging may be switched
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
"slub_debug=-".

config PAGE_OWNER
bool "Track page owner"
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
Expand Down
4 changes: 2 additions & 2 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,8 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr,
int nid, bool exact_nid);

void split_free_page(struct page *free_page,
int order, unsigned long split_pfn_offset);
int split_free_page(struct page *free_page,
unsigned int order, unsigned long split_pfn_offset);

#if defined CONFIG_COMPACTION || defined CONFIG_CMA

Expand Down
2 changes: 1 addition & 1 deletion mm/kasan/report.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ static void print_address_description(void *addr, u8 tag)
va->addr, va->addr + va->size, va->caller);
pr_err("\n");

page = vmalloc_to_page(page);
page = vmalloc_to_page(addr);
}
}

Expand Down
18 changes: 12 additions & 6 deletions mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,13 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,

if (!xa_is_value(page))
continue;
swap = radix_to_swp_entry(page);
/* There might be swapin error entries in shmem mapping. */
if (non_swap_entry(swap))
continue;
xas_pause(&xas);
rcu_read_unlock();

swap = radix_to_swp_entry(page);
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
NULL, 0, false, &splug);
if (page)
Expand Down Expand Up @@ -624,11 +627,14 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
swp_entry_t entry;

entry = pte_to_swp_entry(ptent);
if (non_swap_entry(entry))
continue;
nr_swap--;
free_swap_and_cache(entry);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
if (!non_swap_entry(entry)) {
nr_swap--;
free_swap_and_cache(entry);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} else if (is_hwpoison_entry(entry) ||
is_swapin_error_entry(entry)) {
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
}
continue;
}

Expand Down
5 changes: 4 additions & 1 deletion mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
/* Only drop the uffd-wp marker if explicitly requested */
if (!zap_drop_file_uffd_wp(details))
continue;
} else if (is_hwpoison_entry(entry)) {
} else if (is_hwpoison_entry(entry) ||
is_swapin_error_entry(entry)) {
if (!should_zap_cows(details))
continue;
} else {
Expand Down Expand Up @@ -3727,6 +3728,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
} else if (is_swapin_error_entry(entry)) {
ret = VM_FAULT_SIGBUS;
} else if (is_pte_marker_entry(entry)) {
ret = handle_pte_marker(vmf);
} else {
Expand Down
Loading

0 comments on commit 8291eaa

Please sign in to comment.