Skip to content

Commit

Permalink
mm: Close races between THP migration and PMD numa clearing
Browse files Browse the repository at this point in the history
THP migration uses the page lock to guard against parallel allocations
but there are cases like this still open

  Task A					Task B
  ---------------------				---------------------
  do_huge_pmd_numa_page				do_huge_pmd_numa_page
  lock_page
  mpol_misplaced == -1
  unlock_page
  goto clear_pmdnuma
						lock_page
						mpol_misplaced == 2
						migrate_misplaced_transhuge
  pmd = pmd_mknonnuma
  set_pmd_at

During hours of testing, one crashed with weird errors and while I have
no direct evidence, I suspect something like the race above happened.
This patch extends the page lock to being held until the pmd_numa is
cleared to prevent migration starting in parallel while the pmd_numa is
being cleared. It also flushes the old pmd entry and orders pagetable
insertion before rmap insertion.

Signed-off-by: Mel Gorman <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Mel Gorman authored and Ingo Molnar committed Oct 9, 2013
1 parent 8191acb commit a54a407
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 26 deletions.
33 changes: 15 additions & 18 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1304,57 +1304,54 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
target_nid = mpol_misplaced(page, vma, haddr);
if (target_nid == -1) {
/* If the page was locked, there are no parallel migrations */
if (page_locked) {
unlock_page(page);
if (page_locked)
goto clear_pmdnuma;
}

/* Otherwise wait for potential migrations and retry fault */
/*
* Otherwise wait for potential migrations and retry. We do
* relock and check_same as the page may no longer be mapped.
* As the fault is being retried, do not account for it.
*/
spin_unlock(&mm->page_table_lock);
wait_on_page_locked(page);
page_nid = -1;
goto out;
}

/* Page is misplaced, serialise migrations and parallel THP splits */
get_page(page);
spin_unlock(&mm->page_table_lock);
if (!page_locked) {
if (!page_locked)
lock_page(page);
page_locked = true;
}
anon_vma = page_lock_anon_vma_read(page);

/* Confirm the PMD did not change while page_table_lock was released */
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) {
unlock_page(page);
put_page(page);
page_nid = -1;
goto out_unlock;
}

/* Migrate the THP to the requested node */
/*
* Migrate the THP to the requested node, returns with page unlocked
* and pmd_numa cleared.
*/
spin_unlock(&mm->page_table_lock);
migrated = migrate_misplaced_transhuge_page(mm, vma,
pmdp, pmd, addr, page, target_nid);
if (migrated)
page_nid = target_nid;
else
goto check_same;

goto out;

check_same:
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) {
/* Someone else took our fault */
page_nid = -1;
goto out_unlock;
}
clear_pmdnuma:
BUG_ON(!PageLocked(page));
pmd = pmd_mknonnuma(pmd);
set_pmd_at(mm, haddr, pmdp, pmd);
VM_BUG_ON(pmd_numa(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
out_unlock:
spin_unlock(&mm->page_table_lock);

Expand Down
19 changes: 11 additions & 8 deletions mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1713,12 +1713,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
unlock_page(new_page);
put_page(new_page); /* Free it */

unlock_page(page);
/* Retake the callers reference and putback on LRU */
get_page(page);
putback_lru_page(page);

count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
isolated = 0;
goto out;
mod_zone_page_state(page_zone(page),
NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
goto out_fail;
}

/*
Expand All @@ -1735,9 +1735,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = pmd_mkhuge(entry);

page_add_new_anon_rmap(new_page, vma, haddr);

pmdp_clear_flush(vma, haddr, pmd);
set_pmd_at(mm, haddr, pmd, entry);
page_add_new_anon_rmap(new_page, vma, haddr);
update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(page);
/*
Expand All @@ -1756,7 +1756,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);

out:
mod_zone_page_state(page_zone(page),
NR_ISOLATED_ANON + page_lru,
-HPAGE_PMD_NR);
Expand All @@ -1765,6 +1764,10 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
out_fail:
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
out_dropref:
entry = pmd_mknonnuma(entry);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, &entry);

unlock_page(page);
put_page(page);
return 0;
Expand Down

0 comments on commit a54a407

Please sign in to comment.