Skip to content

Commit

Permalink
uml: runtime host VMSPLIT detection
Browse files Browse the repository at this point in the history
Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is
needed on i386 if UML is to run on hosts with varying VMSPLITs without
recompilation.

TASK_SIZE is now defined in terms of a variable, task_size.  This gets rid of
an include of pgtable.h from processor.h, which can cause include loops.

On i386, task_size is calculated early in boot by probing the address space in
a binary search to figure out where the boundary between usable and non-usable
memory is.  This tries to make sure that a page that is considered to be in
userspace is, or can be made, read-write.  I'm concerned about a system-global
VDSO page in kernel memory being hit and considered to be a userspace page.

On x86_64, task_size is just the old value of CONFIG_TOP_ADDR.

A bunch of config variable are gone now.  CONFIG_TOP_ADDR is directly replaced
by TASK_SIZE.  NEST_LEVEL is gone since the relocation of the stubs makes it
irrelevant.  All the HOST_VMSPLIT stuff is gone.  All references to these in
arch/um/Makefile are also gone.

I noticed and fixed a missing extern in os.h when adding os_get_task_size.

Note: This has been revised to fix the 32-bit UML on 64-bit host bug that
Miklos ran into.

Signed-off-by: Jeff Dike <[email protected]>
Cc: Miklos Szeredi <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
cfd-36 authored and Linus Torvalds committed Feb 8, 2008
1 parent 2f569af commit 536788f
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 75 deletions.
11 changes: 0 additions & 11 deletions arch/um/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -203,17 +203,6 @@ config NR_CPUS
depends on SMP
default "32"

config NEST_LEVEL
int "Nesting level"
default "0"
help
This is set to the number of layers of UMLs that this UML will be run
in. Normally, this is zero, meaning that it will run directly on the
host. Setting it to one will build a UML that can run inside a UML
that is running on the host. Generally, if you intend this UML to run
inside another UML, set CONFIG_NEST_LEVEL to one more than the host
UML.

config HIGHMEM
bool "Highmem support (EXPERIMENTAL)"
depends on !64BIT && EXPERIMENTAL
Expand Down
37 changes: 0 additions & 37 deletions arch/um/Kconfig.i386
Original file line number Diff line number Diff line change
Expand Up @@ -23,43 +23,6 @@ config SEMAPHORE_SLEEPERS
bool
default y

choice
prompt "Host memory split"
default HOST_VMSPLIT_3G
help
This is needed when the host kernel on which you run has a non-default
(like 2G/2G) memory split, instead of the customary 3G/1G. If you did
not recompile your own kernel but use the default distro's one, you can
safely accept the "Default split" option.

It can be enabled on recent (>=2.6.16-rc2) vanilla kernels via
CONFIG_VM_SPLIT_*, or on previous kernels with special patches (-ck
patchset by Con Kolivas, or other ones) - option names match closely the
host CONFIG_VM_SPLIT_* ones.

A lower setting (where 1G/3G is lowest and 3G/1G is higher) will
tolerate even more "normal" host kernels, but an higher setting will be
stricter.

So, if you do not know what to do here, say 'Default split'.

config HOST_VMSPLIT_3G
bool "Default split (3G/1G user/kernel host split)"
config HOST_VMSPLIT_3G_OPT
bool "3G/1G user/kernel host split (for full 1G low memory)"
config HOST_VMSPLIT_2G
bool "2G/2G user/kernel host split"
config HOST_VMSPLIT_1G
bool "1G/3G user/kernel host split"
endchoice

config TOP_ADDR
hex
default 0xB0000000 if HOST_VMSPLIT_3G_OPT
default 0x78000000 if HOST_VMSPLIT_2G
default 0x40000000 if HOST_VMSPLIT_1G
default 0xC0000000

config 3_LEVEL_PGTABLES
bool "Three-level pagetables (EXPERIMENTAL)"
default n
Expand Down
4 changes: 0 additions & 4 deletions arch/um/Kconfig.x86_64
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ config SEMAPHORE_SLEEPERS
bool
default y

config TOP_ADDR
hex
default 0x7fc0000000

config 3_LEVEL_PGTABLES
bool
default y
Expand Down
11 changes: 0 additions & 11 deletions arch/um/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,6 @@ KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \
KBUILD_CFLAGS += $(KERNEL_DEFINES)
KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,)

# These are needed for clean and mrproper, since in that case .config is not
# included; the values here are meaningless

CONFIG_NEST_LEVEL ?= 0

SIZE = ($(CONFIG_NEST_LEVEL) * 0x20000000)

PHONY += linux

all: linux
Expand Down Expand Up @@ -120,10 +113,6 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,)
CONFIG_KERNEL_STACK_ORDER ?= 2
STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )

ifndef START
START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] )
endif

CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
-DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)

Expand Down
3 changes: 0 additions & 3 deletions arch/um/defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ CONFIG_X86_TSC=y
CONFIG_UML_X86=y
# CONFIG_64BIT is not set
CONFIG_SEMAPHORE_SLEEPERS=y
# CONFIG_HOST_2G_2G is not set
CONFIG_TOP_ADDR=0xc0000000
# CONFIG_3_LEVEL_PGTABLES is not set
CONFIG_ARCH_HAS_SC_SIGNALS=y
CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
Expand All @@ -81,7 +79,6 @@ CONFIG_HOSTFS=y
# CONFIG_HPPFS is not set
CONFIG_MCONSOLE=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_NEST_LEVEL=0
# CONFIG_HIGHMEM is not set
CONFIG_KERNEL_STACK_ORDER=0

Expand Down
2 changes: 2 additions & 0 deletions arch/um/include/as-layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end;
extern unsigned long _unprotected_end;
extern unsigned long brk_start;

extern unsigned long host_task_size;

extern int linux_main(int argc, char **argv);

extern void (*sig_info[])(int, struct uml_pt_regs *);
Expand Down
5 changes: 4 additions & 1 deletion arch/um/include/os.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ extern void maybe_sigio_broken(int fd, int read);
extern int os_arch_prctl(int pid, int code, unsigned long *addr);

/* tty.c */
int get_pty(void);
extern int get_pty(void);

/* sys-$ARCH/task_size.c */
extern unsigned long os_get_task_size(void);

#endif
2 changes: 1 addition & 1 deletion arch/um/kernel/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void flush_thread(void)

ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
ret = ret || unmap(&current->mm->context.id, STUB_END,
TASK_SIZE - STUB_END, 1, &data);
host_task_size - STUB_END, 1, &data);
if (ret) {
printk(KERN_ERR "flush_thread - clearing address space failed, "
"err = %d\n", ret);
Expand Down
16 changes: 14 additions & 2 deletions arch/um/kernel/um_arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,11 @@ static struct notifier_block panic_exit_notifier = {
};

/* Set during early boot */
unsigned long task_size;
EXPORT_SYMBOL(task_size);

unsigned long host_task_size;

unsigned long brk_start;
unsigned long end_iomem;
EXPORT_SYMBOL(end_iomem);
Expand All @@ -267,6 +272,13 @@ int __init linux_main(int argc, char **argv)
if (have_root == 0)
add_arg(DEFAULT_COMMAND_LINE);

host_task_size = os_get_task_size();
/*
* TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
* out
*/
task_size = host_task_size & PGDIR_MASK;

/* OS sanity checks that need to happen before the kernel runs */
os_early_checks();

Expand Down Expand Up @@ -303,7 +315,7 @@ int __init linux_main(int argc, char **argv)

highmem = 0;
iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
max_physmem = CONFIG_TOP_ADDR - uml_physmem - iomem_size - MIN_VMALLOC;
max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;

/*
* Zones have to begin on a 1 << MAX_ORDER page boundary,
Expand Down Expand Up @@ -335,7 +347,7 @@ int __init linux_main(int argc, char **argv)
}

virtmem_size = physmem_size;
avail = CONFIG_TOP_ADDR - start_vm;
avail = TASK_SIZE - start_vm;
if (physmem_size > avail)
virtmem_size = avail;
end_vm = start_vm + virtmem_size;
Expand Down
2 changes: 1 addition & 1 deletion arch/um/os-Linux/sys-i386/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Licensed under the GPL
#

obj-y = registers.o signal.o tls.o
obj-y = registers.o signal.o task_size.o tls.o

USER_OBJS := $(obj-y)

Expand Down
120 changes: 120 additions & 0 deletions arch/um/os-Linux/sys-i386/task_size.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/mman.h>
#include "longjmp.h"
#include "kern_constants.h"

static jmp_buf buf;

static void segfault(int sig)
{
longjmp(buf, 1);
}

static int page_ok(unsigned long page)
{
unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
unsigned long n = ~0UL;
void *mapped = NULL;
int ok = 0;

/*
* First see if the page is readable. If it is, it may still
* be a VDSO, so we go on to see if it's writable. If not
* then try mapping memory there. If that fails, then we're
* still in the kernel area. As a sanity check, we'll fail if
* the mmap succeeds, but gives us an address different from
* what we wanted.
*/
if (setjmp(buf) == 0)
n = *address;
else {
mapped = mmap(address, UM_KERN_PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mapped == MAP_FAILED)
return 0;
if (mapped != address)
goto out;
}

/*
* Now, is it writeable? If so, then we're in user address
* space. If not, then try mprotecting it and try the write
* again.
*/
if (setjmp(buf) == 0) {
*address = n;
ok = 1;
goto out;
} else if (mprotect(address, UM_KERN_PAGE_SIZE,
PROT_READ | PROT_WRITE) != 0)
goto out;

if (setjmp(buf) == 0) {
*address = n;
ok = 1;
}

out:
if (mapped != NULL)
munmap(mapped, UM_KERN_PAGE_SIZE);
return ok;
}

unsigned long os_get_task_size(void)
{
struct sigaction sa, old;
unsigned long bottom = 0;
/*
* A 32-bit UML on a 64-bit host gets confused about the VDSO at
* 0xffffe000. It is mapped, is readable, can be reprotected writeable
* and written. However, exec discovers later that it can't be
* unmapped. So, just set the highest address to be checked to just
* below it. This might waste some address space on 4G/4G 32-bit
* hosts, but shouldn't hurt otherwise.
*/
unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
unsigned long test;

printf("Locating the top of the address space ... ");
fflush(stdout);

/*
* We're going to be longjmping out of the signal handler, so
* SA_DEFER needs to be set.
*/
sa.sa_handler = segfault;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_NODEFER;
sigaction(SIGSEGV, &sa, &old);

if (!page_ok(bottom)) {
fprintf(stderr, "Address 0x%x no good?\n",
bottom << UM_KERN_PAGE_SHIFT);
exit(1);
}

/* This could happen with a 4G/4G split */
if (page_ok(top))
goto out;

do {
test = bottom + (top - bottom) / 2;
if (page_ok(test))
bottom = test;
else
top = test;
} while (top - bottom > 1);

out:
/* Restore the old SIGSEGV handling */
sigaction(SIGSEGV, &old, NULL);

top <<= UM_KERN_PAGE_SHIFT;
printf("0x%x\n", top);
fflush(stdout);

return top;
}
2 changes: 1 addition & 1 deletion arch/um/os-Linux/sys-x86_64/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Licensed under the GPL
#

obj-y = registers.o prctl.o signal.o
obj-y = registers.o prctl.o signal.o task_size.o

USER_OBJS := $(obj-y)

Expand Down
5 changes: 5 additions & 0 deletions arch/um/os-Linux/sys-x86_64/task_size.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unsigned long os_get_task_size(unsigned long shift)
{
/* The old value of CONFIG_TOP_ADDR */
return 0x7fc0000000;
}
3 changes: 2 additions & 1 deletion include/asm-um/fixmap.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef __UM_FIXMAP_H
#define __UM_FIXMAP_H

#include <asm/processor.h>
#include <asm/system.h>
#include <asm/kmap_types.h>
#include <asm/archparam.h>
Expand Down Expand Up @@ -57,7 +58,7 @@ extern void __set_fixmap (enum fixed_addresses idx,
* at the top of mem..
*/

#define FIXADDR_TOP (CONFIG_TOP_ADDR - 2 * PAGE_SIZE)
#define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE)
#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)

Expand Down
5 changes: 3 additions & 2 deletions include/asm-um/processor-generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ struct pt_regs;
struct task_struct;

#include "asm/ptrace.h"
#include "asm/pgtable.h"
#include "registers.h"
#include "sysdep/archsetjmp.h"

Expand Down Expand Up @@ -92,7 +91,9 @@ static inline void mm_copy_segments(struct mm_struct *from_mm,
/*
* User space process size: 3GB (default).
*/
#define TASK_SIZE (CONFIG_TOP_ADDR & PGDIR_MASK)
extern unsigned long task_size;

#define TASK_SIZE (task_size)

#undef STACK_TOP
#undef STACK_TOP_MAX
Expand Down

0 comments on commit 536788f

Please sign in to comment.