Skip to content

Commit

Permalink
mm: move vma_shrink(), vma_expand() to internal header
Browse files Browse the repository at this point in the history
The vma_shrink() and vma_expand() functions are internal VMA manipulation
functions which we ought to abstract for use outside of memory management
code.

To achieve this, we replace shift_arg_pages() in fs/exec.c with an
invocation of a new relocate_vma_down() function implemented in mm/mmap.c,
which enables us to also move move_page_tables() and vma_iter_prev_range()
to internal.h.

The purpose of doing this is to isolate key VMA manipulation functions in
order that we can both abstract them and later render them easily
testable.

Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <[email protected]>
Reviewed-by: Vlastimil Babka <[email protected]>
Reviewed-by: Liam R. Howlett <[email protected]>
Cc: Alexander Viro <[email protected]>
Cc: Brendan Higgins <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: David Gow <[email protected]>
Cc: Eric W. Biederman <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Cc: Rae Moar <[email protected]>
Cc: SeongJae Park <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: Pengfei Xu <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
lorenzo-stoakes authored and akpm00 committed Sep 2, 2024
1 parent fa04c08 commit d61f0d5
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 91 deletions.
81 changes: 6 additions & 75 deletions fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,80 +711,6 @@ static int copy_strings_kernel(int argc, const char *const *argv,

#ifdef CONFIG_MMU

/*
* During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
* the binfmt code determines where the new stack should reside, we shift it to
* its final location. The process proceeds as follows:
*
* 1) Use shift to calculate the new vma endpoints.
* 2) Extend vma to cover both the old and new ranges. This ensures the
* arguments passed to subsequent functions are consistent.
* 3) Move vma's page tables to the new range.
* 4) Free up any cleared pgd range.
* 5) Shrink the vma to cover only the new range.
*/
static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long old_start = vma->vm_start;
unsigned long old_end = vma->vm_end;
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
VMA_ITERATOR(vmi, mm, new_start);
struct vm_area_struct *next;
struct mmu_gather tlb;

BUG_ON(new_start > new_end);

/*
* ensure there are no vmas between where we want to go
* and where we are
*/
if (vma != vma_next(&vmi))
return -EFAULT;

vma_iter_prev_range(&vmi);
/*
* cover the whole range: [new_start, old_end)
*/
if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
return -ENOMEM;

/*
* move the page tables downwards, on failure we rely on
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
vma, new_start, length, false, true))
return -ENOMEM;

lru_add_drain();
tlb_gather_mmu(&tlb, mm);
next = vma_next(&vmi);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
next ? next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
* the address space in [new_end, old_start) some architectures
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
next ? next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb);

vma_prev(&vmi);
/* Shrink the vma to just the new range */
return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
}

/*
* Finalizes the stack vm_area_struct. The flags and permissions are updated,
* the stack is optionally relocated, and some extra space is added.
Expand Down Expand Up @@ -877,7 +803,12 @@ int setup_arg_pages(struct linux_binprm *bprm,

/* Move stack pages down in memory. */
if (stack_shift) {
ret = shift_arg_pages(vma, stack_shift);
/*
* During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
* the binfmt code determines where the new stack should reside, we shift it to
* its final location.
*/
ret = relocate_vma_down(vma, stack_shift);
if (ret)
goto out_unlock;
}
Expand Down
17 changes: 1 addition & 16 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1005,12 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
return mas_prev(&vmi->mas, 0);
}

static inline
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
{
return mas_prev_range(&vmi->mas, 0);
}

static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
return vmi->mas.index;
Expand Down Expand Up @@ -2520,11 +2514,6 @@ int set_page_dirty_lock(struct page *page);

int get_cmdline(struct task_struct *task, char *buffer, int buflen);

extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
bool need_rmap_locks, bool for_stack);

/*
* Flags used by change_protection(). For now we make it a bitmap so
* that we can pass in multiple flags just like parameters. However
Expand Down Expand Up @@ -3267,18 +3256,14 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);

/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff,
struct vm_area_struct *next);
extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);

static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
Expand Down
18 changes: 18 additions & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,12 @@ static inline struct vm_area_struct
vma_policy(vma), new_ctx, anon_vma_name(vma));
}

int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff,
struct vm_area_struct *next);
int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff);

enum {
/* mark page accessed */
FOLL_TOUCH = 1 << 16,
Expand Down Expand Up @@ -1528,6 +1534,12 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
return 0;
}

static inline
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
{
return mas_prev_range(&vmi->mas, 0);
}

/*
* VMA lock generalization
*/
Expand Down Expand Up @@ -1639,4 +1651,10 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);

/* mremap.c */
unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
bool need_rmap_locks, bool for_stack);

#endif /* __MM_INTERNAL_H */
81 changes: 81 additions & 0 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -4088,3 +4088,84 @@ static int __meminit init_reserve_notifier(void)
return 0;
}
subsys_initcall(init_reserve_notifier);

/*
* Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
* this VMA and its relocated range, which will now reside at [vma->vm_start -
* shift, vma->vm_end - shift).
*
* This function is almost certainly NOT what you want for anything other than
* early executable temporary stack relocation.
*/
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
{
/*
* The process proceeds as follows:
*
* 1) Use shift to calculate the new vma endpoints.
* 2) Extend vma to cover both the old and new ranges. This ensures the
* arguments passed to subsequent functions are consistent.
* 3) Move vma's page tables to the new range.
* 4) Free up any cleared pgd range.
* 5) Shrink the vma to cover only the new range.
*/

struct mm_struct *mm = vma->vm_mm;
unsigned long old_start = vma->vm_start;
unsigned long old_end = vma->vm_end;
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
VMA_ITERATOR(vmi, mm, new_start);
struct vm_area_struct *next;
struct mmu_gather tlb;

BUG_ON(new_start > new_end);

/*
* ensure there are no vmas between where we want to go
* and where we are
*/
if (vma != vma_next(&vmi))
return -EFAULT;

vma_iter_prev_range(&vmi);
/*
* cover the whole range: [new_start, old_end)
*/
if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
return -ENOMEM;

/*
* move the page tables downwards, on failure we rely on
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
vma, new_start, length, false, true))
return -ENOMEM;

lru_add_drain();
tlb_gather_mmu(&tlb, mm);
next = vma_next(&vmi);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
next ? next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
* the address space in [new_end, old_start) some architectures
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
next ? next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb);

vma_prev(&vmi);
/* Shrink the vma to just the new range */
return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
}

0 comments on commit d61f0d5

Please sign in to comment.