Skip to content

Commit

Permalink
NOMMU: Make VMAs per MM as for MMU-mode linux
Browse files Browse the repository at this point in the history
Make VMAs per mm_struct as for MMU-mode linux.  This solves two problems:

 (1) In SYSV SHM where nattch for a segment does not reflect the number of
     shmat's (and forks) done.

 (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an
     exec'ing process when VM_EXECUTABLE is specified, regardless of the fact
     that a VMA might be shared and already have its vm_mm assigned to another
     process or a dead process.

A new struct (vm_region) is introduced to track a mapped region and to remember
the circumstances under which it may be shared and the vm_list_struct structure
is discarded as it's no longer required.

This patch makes the following additional changes:

 (1) Regions are now allocated with alloc_pages() rather than kmalloc() and
     with no recourse to __GFP_COMP, so the pages are not composite.  Instead,
     each page has a reference on it held by the region.  Anything else that is
     interested in such a page will have to get a reference on it to retain it.
     When the pages are released due to unmapping, each page is passed to
     put_page() and will be freed when the page usage count reaches zero.

 (2) Excess pages are trimmed after an allocation as the allocation must be
     made as a power-of-2 quantity of pages.

 (3) VMAs are added to the parent MM's R/B tree and mmap lists.  As an MM may
     end up with overlapping VMAs within the tree, the VMA struct address is
     appended to the sort key.

 (4) Non-anonymous VMAs are now added to the backing inode's prio list.

 (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of
     the backing region.  The VMA and region structs will be split if
     necessary.

 (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory
     segment instead of all the attachments at that addresss.  Multiple
     shmat()'s return the same address under NOMMU-mode instead of different
     virtual addresses as under MMU-mode.

 (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode.

 (8) /proc/maps is now the global list of mapped regions, and may list bits
     that aren't actually mapped anywhere.

 (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount
     of RAM currently allocated by mmap to hold mappable regions that can't be
     mapped directly.  These are copies of the backing device or file if not
     anonymous.

These changes make NOMMU mode more similar to MMU mode.  The downside is that
NOMMU mode requires some extra memory to track things over NOMMU without this
patch (VMAs are no longer shared, and there are now region structs).

Signed-off-by: David Howells <[email protected]>
Tested-by: Mike Frysinger <[email protected]>
Acked-by: Paul Mundt <[email protected]>
  • Loading branch information
dhowells committed Jan 8, 2009
1 parent 4183638 commit 8feae13
Show file tree
Hide file tree
Showing 23 changed files with 860 additions and 436 deletions.
18 changes: 12 additions & 6 deletions Documentation/nommu-mmap.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,18 @@ and it's also much more restricted in the latter case:
FURTHER NOTES ON NO-MMU MMAP
============================

(*) A request for a private mapping of less than a page in size may not return
a page-aligned buffer. This is because the kernel calls kmalloc() to
allocate the buffer, not get_free_page().

(*) A list of all the mappings on the system is visible through /proc/maps in
no-MMU mode.
(*) A request for a private mapping of a file may return a buffer that is not
page-aligned. This is because XIP may take place, and the data may not be
paged aligned in the backing store.

(*) A request for an anonymous mapping will always be page aligned. If
possible the size of the request should be a power of two otherwise some
of the space may be wasted as the kernel must allocate a power-of-2
granule but will only discard the excess if appropriately configured as
this has an effect on fragmentation.

(*) A list of all the private copy and anonymous mappings on the system is
visible through /proc/maps in no-MMU mode.

(*) A list of all the mappings in use by a process is visible through
/proc/<pid>/maps in no-MMU mode.
Expand Down
1 change: 0 additions & 1 deletion arch/arm/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ typedef struct {
* modified for 2.6 by Hyok S. Choi <[email protected]>
*/
typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;

Expand Down
1 change: 0 additions & 1 deletion arch/blackfin/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ struct sram_list_struct {
};

typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
unsigned long stack_start;

Expand Down
6 changes: 3 additions & 3 deletions arch/blackfin/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,15 @@ put_reg(struct task_struct *task, int regno, unsigned long data)
static inline int is_user_addr_valid(struct task_struct *child,
unsigned long start, unsigned long len)
{
struct vm_list_struct *vml;
struct vm_area_struct *vma;
struct sram_list_struct *sraml;

/* overflow */
if (start + len < start)
return -EIO;

for (vml = child->mm->context.vmlist; vml; vml = vml->next)
if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end)
vma = find_vma(child->mm, start);
if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
return 0;

for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
Expand Down
11 changes: 6 additions & 5 deletions arch/blackfin/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
#include <asm/traps.h>
#include <asm/cacheflush.h>
#include <asm/cplb.h>
Expand Down Expand Up @@ -83,6 +84,7 @@ static void decode_address(char *buf, unsigned long address)
struct mm_struct *mm;
unsigned long flags, offset;
unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
struct rb_node *n;

#ifdef CONFIG_KALLSYMS
unsigned long symsize;
Expand Down Expand Up @@ -128,9 +130,10 @@ static void decode_address(char *buf, unsigned long address)
if (!mm)
continue;

vml = mm->context.vmlist;
while (vml) {
struct vm_area_struct *vma = vml->vma;
for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
struct vm_area_struct *vma;

vma = rb_entry(n, struct vm_area_struct, vm_rb);

if (address >= vma->vm_start && address < vma->vm_end) {
char _tmpbuf[256];
Expand Down Expand Up @@ -176,8 +179,6 @@ static void decode_address(char *buf, unsigned long address)

goto done;
}

vml = vml->next;
}
if (!in_atomic)
mmput(mm);
Expand Down
11 changes: 6 additions & 5 deletions arch/frv/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ static inline int put_reg(struct task_struct *task, int regno,
}

/*
* check that an address falls within the bounds of the target process's memory mappings
* check that an address falls within the bounds of the target process's memory
* mappings
*/
static inline int is_user_addr_valid(struct task_struct *child,
unsigned long start, unsigned long len)
Expand All @@ -79,11 +80,11 @@ static inline int is_user_addr_valid(struct task_struct *child,
return -EIO;
return 0;
#else
struct vm_list_struct *vml;
struct vm_area_struct *vma;

for (vml = child->mm->context.vmlist; vml; vml = vml->next)
if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end)
return 0;
vma = find_vma(child->mm, start);
if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
return 0;

return -EIO;
#endif
Expand Down
1 change: 0 additions & 1 deletion arch/h8300/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
/* Copyright (C) 2002, David McCullough <[email protected]> */

typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;

Expand Down
1 change: 0 additions & 1 deletion arch/m68knommu/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
/* Copyright (C) 2002, David McCullough <[email protected]> */

typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;

Expand Down
1 change: 0 additions & 1 deletion arch/sh/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ typedef struct {
mm_context_id_t id;
void *vdso;
#else
struct vm_list_struct *vmlist;
unsigned long end_brk;
#endif
#ifdef CONFIG_BINFMT_ELF_FDPIC
Expand Down
27 changes: 3 additions & 24 deletions fs/binfmt_elf_fdpic.c
Original file line number Diff line number Diff line change
Expand Up @@ -1567,11 +1567,9 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
static int elf_fdpic_dump_segments(struct file *file, size_t *size,
unsigned long *limit, unsigned long mm_flags)
{
struct vm_list_struct *vml;

for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
struct vm_area_struct *vma = vml->vma;
struct vm_area_struct *vma;

for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
if (!maydump(vma, mm_flags))
continue;

Expand Down Expand Up @@ -1617,9 +1615,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
elf_fpxregset_t *xfpu = NULL;
#endif
int thread_status_size = 0;
#ifndef CONFIG_MMU
struct vm_list_struct *vml;
#endif
elf_addr_t *auxv;
unsigned long mm_flags;

Expand Down Expand Up @@ -1685,13 +1680,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
fill_prstatus(prstatus, current, signr);
elf_core_copy_regs(&prstatus->pr_reg, regs);

#ifdef CONFIG_MMU
segs = current->mm->map_count;
#else
segs = 0;
for (vml = current->mm->context.vmlist; vml; vml = vml->next)
segs++;
#endif
#ifdef ELF_CORE_EXTRA_PHDRS
segs += ELF_CORE_EXTRA_PHDRS;
#endif
Expand Down Expand Up @@ -1766,20 +1755,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
mm_flags = current->mm->flags;

/* write program headers for segments dump */
for (
#ifdef CONFIG_MMU
vma = current->mm->mmap; vma; vma = vma->vm_next
#else
vml = current->mm->context.vmlist; vml; vml = vml->next
#endif
) {
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
struct elf_phdr phdr;
size_t sz;

#ifndef CONFIG_MMU
vma = vml->vma;
#endif

sz = vma->vm_end - vma->vm_start;

phdr.p_type = PT_LOAD;
Expand Down
2 changes: 0 additions & 2 deletions fs/proc/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ do { \
(vmi)->used = 0; \
(vmi)->largest_chunk = 0; \
} while(0)

extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
#endif

extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
Expand Down
6 changes: 6 additions & 0 deletions fs/proc/meminfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
"HighFree: %8lu kB\n"
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
#endif
#ifndef CONFIG_MMU
"MmapCopy: %8lu kB\n"
#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
Expand Down Expand Up @@ -115,6 +118,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(i.freehigh),
K(i.totalram-i.totalhigh),
K(i.freeram-i.freehigh),
#endif
#ifndef CONFIG_MMU
K((unsigned long) atomic_read(&mmap_pages_allocated)),
#endif
K(i.totalswap),
K(i.freeswap),
Expand Down
71 changes: 32 additions & 39 deletions fs/proc/nommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,33 +33,33 @@
#include "internal.h"

/*
* display a single VMA to a sequenced file
* display a single region to a sequenced file
*/
int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
static int nommu_region_show(struct seq_file *m, struct vm_region *region)
{
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags, len;

flags = vma->vm_flags;
file = vma->vm_file;
flags = region->vm_flags;
file = region->vm_file;

if (file) {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct inode *inode = region->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
}

seq_printf(m,
"%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
region->vm_start,
region->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
((loff_t)region->vm_pgoff) << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);

if (file) {
Expand All @@ -75,69 +75,62 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
}

/*
* display a list of all the VMAs the kernel knows about
* display a list of all the REGIONs the kernel knows about
* - nommu kernals have a single flat list
*/
static int nommu_vma_list_show(struct seq_file *m, void *v)
static int nommu_region_list_show(struct seq_file *m, void *_p)
{
struct vm_area_struct *vma;
struct rb_node *p = _p;

vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
return nommu_vma_show(m, vma);
return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
}

static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
{
struct rb_node *_rb;
struct rb_node *p;
loff_t pos = *_pos;
void *next = NULL;

down_read(&nommu_vma_sem);
down_read(&nommu_region_sem);

for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) {
if (pos == 0) {
next = _rb;
break;
}
pos--;
}

return next;
for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
if (pos-- == 0)
return p;
return NULL;
}

static void nommu_vma_list_stop(struct seq_file *m, void *v)
static void nommu_region_list_stop(struct seq_file *m, void *v)
{
up_read(&nommu_vma_sem);
up_read(&nommu_region_sem);
}

static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos)
static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return rb_next((struct rb_node *) v);
}

static const struct seq_operations proc_nommu_vma_list_seqop = {
.start = nommu_vma_list_start,
.next = nommu_vma_list_next,
.stop = nommu_vma_list_stop,
.show = nommu_vma_list_show
static struct seq_operations proc_nommu_region_list_seqop = {
.start = nommu_region_list_start,
.next = nommu_region_list_next,
.stop = nommu_region_list_stop,
.show = nommu_region_list_show
};

static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
{
return seq_open(file, &proc_nommu_vma_list_seqop);
return seq_open(file, &proc_nommu_region_list_seqop);
}

static const struct file_operations proc_nommu_vma_list_operations = {
.open = proc_nommu_vma_list_open,
static const struct file_operations proc_nommu_region_list_operations = {
.open = proc_nommu_region_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};

static int __init proc_nommu_init(void)
{
proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
return 0;
}

Expand Down
Loading

0 comments on commit 8feae13

Please sign in to comment.