Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge misc fixes from Andrew Morton:
 "11 patches.

  Subsystems affected by this patch series: mm (memcg, memory-failure,
  oom-kill, secretmem, vmalloc, hugetlb, damon, and tools), and ocfs2"

* emailed patches from Andrew Morton <[email protected]>:
  tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer
  mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()'
  mm: khugepaged: skip huge page collapse for special files
  mm, thp: bail out early in collapse_file for writeback page
  mm/vmalloc: fix numa spreading for large hash tables
  mm/secretmem: avoid letting secretmem_users drop to zero
  ocfs2: fix race between searching chunks and release journal_head from buffer_head
  mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap
  mm: filemap: check if THP has hwpoisoned subpage for PMD page fault
  mm: hwpoison: remove the unnecessary THP check
  memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT
  • Loading branch information
torvalds committed Oct 29, 2021
2 parents f25a548 + 9c7516d commit 2c04d67
Show file tree
Hide file tree
Showing 12 changed files with 110 additions and 54 deletions.
22 changes: 13 additions & 9 deletions fs/ocfs2/suballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1251,22 +1251,26 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
int ret;
int ret = 1;

if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;

if (!buffer_jbd(bg_bh))
return 1;

jh = bh2jh(bg_bh);
spin_lock(&jh->b_state_lock);
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else
ret = 1;
spin_unlock(&jh->b_state_lock);
jbd_lock_bh_journal_head(bg_bh);
if (buffer_jbd(bg_bh)) {
jh = bh2jh(bg_bh);
spin_lock(&jh->b_state_lock);
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else
ret = 1;
spin_unlock(&jh->b_state_lock);
}
jbd_unlock_bh_journal_head(bg_bh);

return ret;
}
Expand Down
23 changes: 23 additions & 0 deletions include/linux/page-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,15 @@ enum pageflags {
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset,

#ifdef CONFIG_MEMORY_FAILURE
/*
* Compound pages. Stored in first tail page's flags.
* Indicates that at least one subpage is hwpoisoned in the
* THP.
*/
PG_has_hwpoisoned = PG_mappedtodisk,
#endif

/* non-lru isolated movable page */
PG_isolated = PG_reclaim,

Expand Down Expand Up @@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap)
TESTSCFLAG_FALSE(DoubleMap)
#endif

#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
/*
* PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
* compound page.
*
* This flag is set by hwpoison handler. Cleared by THP split or free page.
*/
PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
#else
PAGEFLAG_FALSE(HasHWPoisoned)
TESTSCFLAG_FALSE(HasHWPoisoned)
#endif

/*
* Check if a page is currently marked HWPoisoned. Note that this check is
* best effort only and inherently racy: there is no way to synchronize with
Expand Down
4 changes: 2 additions & 2 deletions mm/damon/core-test.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test)
r = damon_new_region(0, 22);
damon_add_region(r, t);
damon_split_regions_of(c, t, 2);
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u);
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
damon_free_target(t);

t = damon_new_target(42);
r = damon_new_region(0, 220);
damon_add_region(r, t);
damon_split_regions_of(c, t, 4);
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u);
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
damon_free_target(t);
damon_destroy_ctx(c);
}
Expand Down
2 changes: 2 additions & 0 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
lruvec = lock_page_lruvec(head);

ClearPageHasHWPoisoned(head);

for (i = nr - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond EOF: drop them from page cache */
Expand Down
26 changes: 17 additions & 9 deletions mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
if (!transhuge_vma_enabled(vma, vm_flags))
return false;

if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
vma->vm_pgoff, HPAGE_PMD_NR))
return false;

/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
}
if (shmem_file(vma->vm_file))
return shmem_huge_enabled(vma);

/* THP settings require madvise. */
if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
return false;

/* Read-only file mappings need to be aligned for THP to work. */
/* Only regular file is valid */
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) &&
(vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
struct inode *inode = vma->vm_file->f_inode;

return !inode_is_open_for_write(inode) &&
S_ISREG(inode->i_mode);
}

if (!vma->anon_vma || vma->vm_ops)
Expand Down Expand Up @@ -1763,6 +1766,10 @@ static void collapse_file(struct mm_struct *mm,
filemap_flush(mapping);
result = SCAN_FAIL;
goto xa_unlocked;
} else if (PageWriteback(page)) {
xas_unlock_irq(&xas);
result = SCAN_FAIL;
goto xa_unlocked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
Expand Down Expand Up @@ -1798,7 +1805,8 @@ static void collapse_file(struct mm_struct *mm,
goto out_unlock;
}

if (!is_shmem && PageDirty(page)) {
if (!is_shmem && (PageDirty(page) ||
PageWriteback(page))) {
/*
* khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed
Expand Down
28 changes: 14 additions & 14 deletions mm/memory-failure.c
Original file line number Diff line number Diff line change
Expand Up @@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page)
if (!HWPoisonHandlable(head))
return -EBUSY;

if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
* This should be better than triggering BUG_ON when kernel
* tries to touch the "partially handled" page.
*/
if (!PageAnon(head)) {
pr_err("Memory failure: %#lx: non anonymous thp\n",
page_to_pfn(page));
return 0;
}
}

if (get_page_unless_zero(head)) {
if (head == compound_head(page))
return 1;
Expand Down Expand Up @@ -1708,6 +1694,20 @@ int memory_failure(unsigned long pfn, int flags)
}

if (PageTransHuge(hpage)) {
/*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
* And the flag can't be set in get_hwpoison_page() since
* it is called by soft offline too and it is just called
* for !MF_COUNT_INCREASE. So here seems to be the best
* place.
*
* Don't need care about the above error handling paths for
* get_hwpoison_page() since they handle either free page
* or unhandlable page. The refcount is bumped iff the
* page is a valid handlable page.
*/
SetPageHasHWPoisoned(hpage);
if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
res = -EBUSY;
Expand Down
9 changes: 9 additions & 0 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
if (compound_order(page) != HPAGE_PMD_ORDER)
return ret;

/*
* Just backoff if any subpage of a THP is corrupted otherwise
* the corrupted page may mapped by PMD silently to escape the
* check. This kind of THP just can be PTE mapped. Access to
* the corrupted subpage should trigger SIGBUS as expected.
*/
if (unlikely(PageHasHWPoisoned(page)))
return ret;

/*
* Archs like ppc64 need additional space to store information
* related to pte entry. Use the preallocated table for that.
Expand Down
23 changes: 12 additions & 11 deletions mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
struct task_struct *task;
struct task_struct *p;
unsigned int f_flags;
bool reap = true;
bool reap = false;
struct pid *pid;
long ret = 0;

Expand All @@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task;
}

mm = p->mm;
mmgrab(mm);

/* If the work has been done already, just exit with success */
if (test_bit(MMF_OOM_SKIP, &mm->flags))
reap = false;
else if (!task_will_free_mem(p)) {
reap = false;
ret = -EINVAL;
if (mmget_not_zero(p->mm)) {
mm = p->mm;
if (task_will_free_mem(p))
reap = true;
else {
/* Error only if the work has not been done already */
if (!test_bit(MMF_OOM_SKIP, &mm->flags))
ret = -EINVAL;
}
}
task_unlock(p);

Expand All @@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
mmap_read_unlock(mm);

drop_mm:
mmdrop(mm);
if (mm)
mmput(mm);
put_task:
put_task_struct(task);
put_pid:
Expand Down
8 changes: 7 additions & 1 deletion mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page,

VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);

if (compound)
if (compound) {
ClearPageDoubleMap(page);
ClearPageHasHWPoisoned(page);
}
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
Expand Down Expand Up @@ -5223,6 +5225,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(page_array && nr_pages - nr_populated == 0))
goto out;

/* Bulk allocator does not support memcg accounting. */
if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT))
goto failed;

/* Use the single page allocator for one page. */
if (nr_pages - nr_populated == 1)
goto failed;
Expand Down
2 changes: 1 addition & 1 deletion mm/secretmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)

file->f_flags |= O_LARGEFILE;

fd_install(fd, file);
atomic_inc(&secretmem_users);
fd_install(fd, file);
return fd;

err_put_fd:
Expand Down
15 changes: 9 additions & 6 deletions mm/vmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2816,14 +2816,16 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
unsigned int order, unsigned int nr_pages, struct page **pages)
{
unsigned int nr_allocated = 0;
struct page *page;
int i;

/*
* For order-0 pages we make use of bulk allocator, if
* the page array is partly or not at all populated due
* to fails, fallback to a single page allocator that is
* more permissive.
*/
if (!order) {
if (!order && nid != NUMA_NO_NODE) {
while (nr_allocated < nr_pages) {
unsigned int nr, nr_pages_request;

Expand All @@ -2848,19 +2850,20 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
if (nr != nr_pages_request)
break;
}
} else
} else if (order)
/*
* Compound pages required for remap_vmalloc_page if
* high-order pages.
*/
gfp |= __GFP_COMP;

/* High-order pages or fallback path if "bulk" fails. */
while (nr_allocated < nr_pages) {
struct page *page;
int i;

page = alloc_pages_node(nid, gfp, order);
while (nr_allocated < nr_pages) {
if (nid == NUMA_NO_NODE)
page = alloc_pages(gfp, order);
else
page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
break;

Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/vm/split_huge_page_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ void split_file_backed_thp(void)
}

/* write something to the file, so a file-backed THP can be allocated */
num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));
num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
close(fd);

if (num_written < 1) {
Expand Down

0 comments on commit 2c04d67

Please sign in to comment.