Skip to content

Commit

Permalink
hugetlbfs: extend hugetlb_vma_lock to private VMAs
Browse files Browse the repository at this point in the history
Extend the locking scheme used to protect shared hugetlb mappings from
truncate vs page fault races, in order to protect private hugetlb mappings
(with resv_map) against MADV_DONTNEED.

Add a read-write semaphore to the resv_map data structure, and use that
from the hugetlb_vma_(un)lock_* functions, in preparation for closing the
race between MADV_DONTNEED and page faults.

Link: https://lkml.kernel.org/r/[email protected]
Fixes: 04ada09 ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing")
Signed-off-by: Rik van Riel <[email protected]>
Reviewed-by: Mike Kravetz <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Cc: Muchun Song <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
rikvanriel authored and akpm00 committed Oct 18, 2023
1 parent 92fe9dc commit bf49169
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 4 deletions.
6 changes: 6 additions & 0 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct resv_map {
long adds_in_progress;
struct list_head region_cache;
long region_cache_count;
struct rw_semaphore rw_sema;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* On private mappings, the counter to uncharge reservations is stored
Expand Down Expand Up @@ -1233,6 +1234,11 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
}

static inline bool __vma_private_lock(struct vm_area_struct *vma)
{
return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data;
}

/*
* Safe version of huge_pte_offset() to check the locks. See comments
* above huge_pte_offset().
Expand Down
41 changes: 37 additions & 4 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);

static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
Expand Down Expand Up @@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

down_read(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

down_read(&resv_map->rw_sema);
}
}

Expand All @@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

up_read(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

up_read(&resv_map->rw_sema);
}
}

Expand All @@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

down_write(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

down_write(&resv_map->rw_sema);
}
}

Expand All @@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

up_write(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

up_write(&resv_map->rw_sema);
}
}

int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

if (!__vma_shareable_lock(vma))
return 1;
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

return down_write_trylock(&vma_lock->rw_sema);
return down_write_trylock(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

return down_write_trylock(&resv_map->rw_sema);
}

return 1;
}

void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
Expand All @@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

lockdep_assert_held(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

lockdep_assert_held(&resv_map->rw_sema);
}
}

Expand Down Expand Up @@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

__hugetlb_vma_unlock_write_put(vma_lock);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);

/* no free for anon vmas, but still need to unlock */
up_write(&resv_map->rw_sema);
}
}

Expand Down Expand Up @@ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void)
kref_init(&resv_map->refs);
spin_lock_init(&resv_map->lock);
INIT_LIST_HEAD(&resv_map->regions);
init_rwsem(&resv_map->rw_sema);

resv_map->adds_in_progress = 0;
/*
Expand Down

0 comments on commit bf49169

Please sign in to comment.