Skip to content

Commit

Permalink
mm: fix mbind vma merge problem
Browse files Browse the repository at this point in the history
Strangely, current mbind() doesn't merge vma with neighbor vma although it's possible.
Unfortunately, many vma can reduce performance...

This patch fixes it.

    reproduced program
    ----------------------------------------------------------------
     #include <numaif.h>
     #include <numa.h>
     #include <sys/mman.h>
     #include <stdio.h>
     #include <unistd.h>
     #include <stdlib.h>
     #include <string.h>

    static unsigned long pagesize;

    int main(int argc, char** argv)
    {
    	void* addr;
    	int ch;
    	int node;
    	struct bitmask *nmask = numa_allocate_nodemask();
    	int err;
    	int node_set = 0;
    	char buf[128];

    	while ((ch = getopt(argc, argv, "n:")) != -1){
    		switch (ch){
    		case 'n':
    			node = strtol(optarg, NULL, 0);
    			numa_bitmask_setbit(nmask, node);
    			node_set = 1;
    			break;
    		default:
    			;
    		}
    	}
    	argc -= optind;
    	argv += optind;

    	if (!node_set)
    		numa_bitmask_setbit(nmask, 0);

    	pagesize = getpagesize();

    	addr = mmap(NULL, pagesize*3, PROT_READ|PROT_WRITE,
    		    MAP_ANON|MAP_PRIVATE, 0, 0);
    	if (addr == MAP_FAILED)
    		perror("mmap "), exit(1);

    	fprintf(stderr, "pid = %d \n" "addr = %p\n", getpid(), addr);

    	/* make page populate */
    	memset(addr, 0, pagesize*3);

    	/* first mbind */
    	err = mbind(addr+pagesize, pagesize, MPOL_BIND, nmask->maskp,
    		    nmask->size, MPOL_MF_MOVE_ALL);
    	if (err)
    		error("mbind1 ");

    	/* second mbind */
    	err = mbind(addr, pagesize*3, MPOL_DEFAULT, NULL, 0, 0);
    	if (err)
    		error("mbind2 ");

    	sprintf(buf, "cat /proc/%d/maps", getpid());
    	system(buf);

    	return 0;
    }
    ----------------------------------------------------------------

result without this patch

	addr = 0x7fe26ef09000
	[snip]
	7fe26ef09000-7fe26ef0a000 rw-p 00000000 00:00 0
	7fe26ef0a000-7fe26ef0b000 rw-p 00000000 00:00 0
	7fe26ef0b000-7fe26ef0c000 rw-p 00000000 00:00 0
	7fe26ef0c000-7fe26ef0d000 rw-p 00000000 00:00 0

	=> 0x7fe26ef09000-0x7fe26ef0c000 have three vmas.

result with this patch

	addr = 0x7fc9ebc76000
	[snip]
	7fc9ebc76000-7fc9ebc7a000 rw-p 00000000 00:00 0
	7fffbe690000-7fffbe6a5000 rw-p 00000000	00:00 0	[stack]

	=> 0x7fc9ebc76000-0x7fc9ebc7a000 have only one vma.

[[email protected]: fix file offset passed to vma_merge()]
Signed-off-by: KOSAKI Motohiro <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Lee Schermerhorn <[email protected]>
Signed-off-by: Minchan Kim <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kosaki authored and torvalds committed Mar 6, 2010
1 parent 93e4a89 commit 9d8cebd
Showing 1 changed file with 39 additions and 13 deletions.
52 changes: 39 additions & 13 deletions mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -563,24 +563,50 @@ static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
}

/* Step 2: apply policy to a range and do splits. */
static int mbind_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct mempolicy *new)
static int mbind_range(struct mm_struct *mm, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
{
struct vm_area_struct *next;
int err;
struct vm_area_struct *prev;
struct vm_area_struct *vma;
int err = 0;
pgoff_t pgoff;
unsigned long vmstart;
unsigned long vmend;

err = 0;
for (; vma && vma->vm_start < end; vma = next) {
vma = find_vma_prev(mm, start, &prev);
if (!vma || vma->vm_start > start)
return -EFAULT;

for (; vma && vma->vm_start < end; prev = vma, vma = next) {
next = vma->vm_next;
if (vma->vm_start < start)
err = split_vma(vma->vm_mm, vma, start, 1);
if (!err && vma->vm_end > end)
err = split_vma(vma->vm_mm, vma, end, 0);
if (!err)
err = policy_vma(vma, new);
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);

pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, new_pol);
if (prev) {
vma = prev;
next = vma->vm_next;
continue;
}
if (vma->vm_start != vmstart) {
err = split_vma(vma->vm_mm, vma, vmstart, 1);
if (err)
goto out;
}
if (vma->vm_end != vmend) {
err = split_vma(vma->vm_mm, vma, vmend, 0);
if (err)
goto out;
}
err = policy_vma(vma, new_pol);
if (err)
break;
goto out;
}

out:
return err;
}

Expand Down Expand Up @@ -1047,7 +1073,7 @@ static long do_mbind(unsigned long start, unsigned long len,
if (!IS_ERR(vma)) {
int nr_failed = 0;

err = mbind_range(vma, start, end, new);
err = mbind_range(mm, start, end, new);

if (!list_empty(&pagelist))
nr_failed = migrate_pages(&pagelist, new_vma_page,
Expand Down

0 comments on commit 9d8cebd

Please sign in to comment.