Skip to content

Commit

Permalink
mm: make mm->pinned_vm an atomic64 counter
Browse files Browse the repository at this point in the history
Taking a sleeping lock to _only_ increment a variable is quite the
overkill, and pretty much all users do this. Furthermore, some drivers
(ie: infiniband and scif) that need pinned semantics can go to quite
some trouble to actually delay via workqueue (un)accounting for pinned
pages when not possible to acquire it.

By making the counter atomic we no longer need to hold the mmap_sem and
can simply some code around it for pinned_vm users. The counter is 64-bit
such that we need not worry about overflows such as rdma user input
controlled from userspace.

Reviewed-by: Ira Weiny <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
Reviewed-by: Daniel Jordan <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Davidlohr Bueso <[email protected]>
Signed-off-by: Jason Gunthorpe <[email protected]>
  • Loading branch information
Davidlohr Bueso authored and jgunthorpe committed Feb 7, 2019
1 parent a2bfd70 commit 70f8a3c
Show file tree
Hide file tree
Showing 10 changed files with 28 additions and 27 deletions.
12 changes: 6 additions & 6 deletions drivers/infiniband/core/umem.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

down_write(&mm->mmap_sem);
if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
(new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
new_pinned = atomic64_read(&mm->pinned_vm) + npages;
if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
up_write(&mm->mmap_sem);
ret = -ENOMEM;
goto out;
}
mm->pinned_vm = new_pinned;
atomic64_set(&mm->pinned_vm, new_pinned);
up_write(&mm->mmap_sem);

cur_base = addr & PAGE_MASK;
Expand Down Expand Up @@ -234,7 +234,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
__ib_umem_release(context->device, umem, 0);
vma:
down_write(&mm->mmap_sem);
mm->pinned_vm -= ib_umem_num_pages(umem);
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
up_write(&mm->mmap_sem);
out:
if (vma_list)
Expand Down Expand Up @@ -263,7 +263,7 @@ static void ib_umem_release_defer(struct work_struct *work)
struct ib_umem *umem = container_of(work, struct ib_umem, work);

down_write(&umem->owning_mm->mmap_sem);
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
up_write(&umem->owning_mm->mmap_sem);

__ib_umem_release_tail(umem);
Expand Down Expand Up @@ -302,7 +302,7 @@ void ib_umem_release(struct ib_umem *umem)
} else {
down_write(&umem->owning_mm->mmap_sem);
}
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
up_write(&umem->owning_mm->mmap_sem);

__ib_umem_release_tail(umem);
Expand Down
6 changes: 3 additions & 3 deletions drivers/infiniband/hw/hfi1/user_pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
size = DIV_ROUND_UP(size, PAGE_SIZE);

down_read(&mm->mmap_sem);
pinned = mm->pinned_vm;
pinned = atomic64_read(&mm->pinned_vm);
up_read(&mm->mmap_sem);

/* First, check the absolute limit against all pinned pages. */
Expand All @@ -112,7 +112,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
return ret;

down_write(&mm->mmap_sem);
mm->pinned_vm += ret;
atomic64_add(ret, &mm->pinned_vm);
up_write(&mm->mmap_sem);

return ret;
Expand All @@ -131,7 +131,7 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,

if (mm) { /* during close after signal, mm can be NULL */
down_write(&mm->mmap_sem);
mm->pinned_vm -= npages;
atomic64_sub(npages, &mm->pinned_vm);
up_write(&mm->mmap_sem);
}
}
4 changes: 2 additions & 2 deletions drivers/infiniband/hw/qib/qib_user_pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
goto bail_release;
}

current->mm->pinned_vm += num_pages;
atomic64_add(num_pages, &current->mm->pinned_vm);

ret = 0;
goto bail;
Expand Down Expand Up @@ -156,7 +156,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
__qib_release_user_pages(p, num_pages, 1);

if (current->mm) {
current->mm->pinned_vm -= num_pages;
atomic64_sub(num_pages, &current->mm->pinned_vm);
up_write(&current->mm->mmap_sem);
}
}
8 changes: 4 additions & 4 deletions drivers/infiniband/hw/usnic/usnic_uiom.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
uiomr->owning_mm = mm = current->mm;
down_write(&mm->mmap_sem);

locked = npages + current->mm->pinned_vm;
locked = npages + atomic64_read(&current->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
Expand Down Expand Up @@ -187,7 +187,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
if (ret < 0)
usnic_uiom_put_pages(chunk_list, 0);
else {
mm->pinned_vm = locked;
atomic64_set(&mm->pinned_vm, locked);
mmgrab(uiomr->owning_mm);
}

Expand Down Expand Up @@ -441,7 +441,7 @@ static void usnic_uiom_release_defer(struct work_struct *work)
container_of(work, struct usnic_uiom_reg, work);

down_write(&uiomr->owning_mm->mmap_sem);
uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
up_write(&uiomr->owning_mm->mmap_sem);

__usnic_uiom_release_tail(uiomr);
Expand Down Expand Up @@ -469,7 +469,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
} else {
down_write(&uiomr->owning_mm->mmap_sem);
}
uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
up_write(&uiomr->owning_mm->mmap_sem);

__usnic_uiom_release_tail(uiomr);
Expand Down
6 changes: 3 additions & 3 deletions drivers/misc/mic/scif/scif_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ __scif_dec_pinned_vm_lock(struct mm_struct *mm,
} else {
down_write(&mm->mmap_sem);
}
mm->pinned_vm -= nr_pages;
atomic64_sub(nr_pages, &mm->pinned_vm);
up_write(&mm->mmap_sem);
return 0;
}
Expand All @@ -299,15 +299,15 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
return 0;

locked = nr_pages;
locked += mm->pinned_vm;
locked += atomic64_read(&mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
dev_err(scif_info.mdev.this_device,
"locked(%lu) > lock_limit(%lu)\n",
locked, lock_limit);
return -ENOMEM;
}
mm->pinned_vm = locked;
atomic64_set(&mm->pinned_vm, locked);
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion fs/proc/task_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
Expand Down
2 changes: 1 addition & 1 deletion include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ struct mm_struct {

unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
unsigned long pinned_vm; /* Refcount permanently increased */
atomic64_t pinned_vm; /* Refcount permanently increased */
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
Expand Down
8 changes: 4 additions & 4 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -5459,7 +5459,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)

/* now it's safe to free the pages */
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);

/* this has to be the last one */
rb_free_aux(rb);
Expand Down Expand Up @@ -5532,7 +5532,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
*/

atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= mmap_locked;
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);

out_put:
Expand Down Expand Up @@ -5680,7 +5680,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)

lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
locked = vma->vm_mm->pinned_vm + extra;
locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;

if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
!capable(CAP_IPC_LOCK)) {
Expand Down Expand Up @@ -5721,7 +5721,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unlock:
if (!ret) {
atomic_long_add(user_extra, &user->locked_vm);
vma->vm_mm->pinned_vm += extra;
atomic64_add(extra, &vma->vm_mm->pinned_vm);

atomic_inc(&event->mmap_count);
} else if (rb) {
Expand Down
2 changes: 1 addition & 1 deletion kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_pgtables_bytes_init(mm);
mm->map_count = 0;
mm->locked_vm = 0;
mm->pinned_vm = 0;
atomic64_set(&mm->pinned_vm, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
spin_lock_init(&mm->arg_lock);
Expand Down
5 changes: 3 additions & 2 deletions mm/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
"pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
"start_brk %lx brk %lx start_stack %lx\n"
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
Expand Down Expand Up @@ -166,7 +166,8 @@ void dump_mm(const struct mm_struct *mm)
mm_pgtables_bytes(mm),
mm->map_count,
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
atomic64_read(&mm->pinned_vm),
mm->data_vm, mm->exec_vm, mm->stack_vm,
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
mm->start_brk, mm->brk, mm->start_stack,
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
Expand Down

0 comments on commit 70f8a3c

Please sign in to comment.