Skip to content

Commit

Permalink
x86: Make 64 bit use early_res instead of bootmem before slab
Browse files Browse the repository at this point in the history
Finally we can use early_res to replace bootmem for x86_64 now.

Still can use CONFIG_NO_BOOTMEM to enable it or not.

-v2: fix 32bit compiling about MAX_DMA32_PFN
-v3: folded bug fix from LKML message below

Signed-off-by: Yinghai Lu <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
  • Loading branch information
Yinghai Lu authored and H. Peter Anvin committed Feb 12, 2010
1 parent c252a5b commit 0867721
Show file tree
Hide file tree
Showing 13 changed files with 454 additions and 23 deletions.
13 changes: 13 additions & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,19 @@ config PARAVIRT_DEBUG
Enable to debug paravirt_ops internals. Specifically, BUG if
a paravirt_op is missing when it is called.

config NO_BOOTMEM
default y
bool "Disable Bootmem code"
depends on X86_64
---help---
Use early_res directly instead of bootmem before slab is ready.
- allocator (buddy) [generic]
- early allocator (bootmem) [generic]
- very early allocator (reserve_early*()) [x86]
- very very early allocator (early brk model) [x86]
So reduce one layer between early allocator to final allocator


config MEMTEST
bool "Memtest"
---help---
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/include/asm/e820.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ extern void free_early(u64 start, u64 end);
extern void early_res_to_bootmem(u64 start, u64 end);
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);

void reserve_early_without_check(u64 start, u64 end, char *name);
u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
u64 size, u64 align);
#include <linux/range.h>
int get_free_all_memory_range(struct range **rangep, int nodeid);

extern unsigned long e820_end_of_ram_pfn(void);
extern unsigned long e820_end_of_low_ram_pfn(void);
extern int e820_find_active_region(const struct e820entry *ei,
Expand Down
159 changes: 146 additions & 13 deletions arch/x86/kernel/e820.c
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,25 @@ void __init reserve_early(u64 start, u64 end, char *name)
__reserve_early(start, end, name, 0);
}

void __init reserve_early_without_check(u64 start, u64 end, char *name)
{
struct early_res *r;

if (start >= end)
return;

__check_and_double_early_res(end);

r = &early_res[early_res_count];

r->start = start;
r->end = end;
r->overlap_ok = 0;
if (name)
strncpy(r->name, name, sizeof(r->name) - 1);
early_res_count++;
}

void __init free_early(u64 start, u64 end)
{
struct early_res *r;
Expand All @@ -991,6 +1010,94 @@ void __init free_early(u64 start, u64 end)
drop_range(i);
}

#ifdef CONFIG_NO_BOOTMEM
static void __init subtract_early_res(struct range *range, int az)
{
int i, count;
u64 final_start, final_end;
int idx = 0;

count = 0;
for (i = 0; i < max_early_res && early_res[i].end; i++)
count++;

/* need to skip first one ?*/
if (early_res != early_res_x)
idx = 1;

#if 1
printk(KERN_INFO "Subtract (%d early reservations)\n", count);
#endif
for (i = idx; i < count; i++) {
struct early_res *r = &early_res[i];
#if 0
printk(KERN_INFO " #%d [%010llx - %010llx] %15s", i,
r->start, r->end, r->name);
#endif
final_start = PFN_DOWN(r->start);
final_end = PFN_UP(r->end);
if (final_start >= final_end) {
#if 0
printk(KERN_CONT "\n");
#endif
continue;
}
#if 0
printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n",
final_start, final_end);
#endif
subtract_range(range, az, final_start, final_end);
}

}

int __init get_free_all_memory_range(struct range **rangep, int nodeid)
{
int i, count;
u64 start = 0, end;
u64 size;
u64 mem;
struct range *range;
int nr_range;

count = 0;
for (i = 0; i < max_early_res && early_res[i].end; i++)
count++;

count *= 2;

size = sizeof(struct range) * count;
#ifdef MAX_DMA32_PFN
if (max_pfn_mapped > MAX_DMA32_PFN)
start = MAX_DMA32_PFN << PAGE_SHIFT;
#endif
end = max_pfn_mapped << PAGE_SHIFT;
mem = find_e820_area(start, end, size, sizeof(struct range));
if (mem == -1ULL)
panic("can not find more space for range free");

range = __va(mem);
/* use early_node_map[] and early_res to get range array at first */
memset(range, 0, size);
nr_range = 0;

/* need to go over early_node_map to find out good range for node */
nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
subtract_early_res(range, count);
nr_range = clean_sort_range(range, count);

/* need to clear it ? */
if (nodeid == MAX_NUMNODES) {
memset(&early_res[0], 0,
sizeof(struct early_res) * max_early_res);
early_res = NULL;
max_early_res = 0;
}

*rangep = range;
return nr_range;
}
#else
void __init early_res_to_bootmem(u64 start, u64 end)
{
int i, count;
Expand Down Expand Up @@ -1028,6 +1135,7 @@ void __init early_res_to_bootmem(u64 start, u64 end)
max_early_res = 0;
early_res_count = 0;
}
#endif

/* Check for already reserved areas */
static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
Expand Down Expand Up @@ -1081,6 +1189,35 @@ static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
return changed;
}

/*
* Find a free area with specified alignment in a specific range.
* only with the area.between start to end is active range from early_node_map
* so they are good as RAM
*/
u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
u64 size, u64 align)
{
u64 addr, last;

addr = round_up(ei_start, align);
if (addr < start)
addr = round_up(start, align);
if (addr >= ei_last)
goto out;
while (bad_addr(&addr, size, align) && addr+size <= ei_last)
;
last = addr + size;
if (last > ei_last)
goto out;
if (last > end)
goto out;

return addr;

out:
return -1ULL;
}

/*
* Find a free area with specified alignment in a specific range.
*/
Expand All @@ -1090,24 +1227,20 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)

for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
u64 addr, last;
u64 ei_last;
u64 addr;
u64 ei_start, ei_last;

if (ei->type != E820_RAM)
continue;
addr = round_up(ei->addr, align);

ei_last = ei->addr + ei->size;
if (addr < start)
addr = round_up(start, align);
if (addr >= ei_last)
continue;
while (bad_addr(&addr, size, align) && addr+size <= ei_last)
;
last = addr + size;
if (last > ei_last)
continue;
if (last > end)
ei_start = ei->addr;
addr = find_early_area(ei_start, ei_last, start, end,
size, align);

if (addr == -1ULL)
continue;

return addr;
}
return -1ULL;
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,9 @@ void __init setup_arch(char **cmdline_p)
#endif

initmem_init(0, max_pfn, acpi, k8);
#ifndef CONFIG_NO_BOOTMEM
early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
#endif

dma32_reserve_bootmem();

Expand Down
4 changes: 4 additions & 0 deletions arch/x86/mm/init_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
#ifndef CONFIG_NO_BOOTMEM
unsigned long bootmap_size, bootmap;

bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
Expand All @@ -585,6 +586,9 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
0, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
#else
e820_register_active_regions(0, start_pfn, end_pfn);
#endif
}
#endif

Expand Down
20 changes: 15 additions & 5 deletions arch/x86/mm/numa_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,13 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
void __init
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
{
unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
unsigned long start_pfn, last_pfn, nodedata_phys;
const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
unsigned long bootmap_start, nodedata_phys;
void *bootmap;
int nid;
#ifndef CONFIG_NO_BOOTMEM
unsigned long bootmap_start, bootmap_pages, bootmap_size;
void *bootmap;
#endif

if (!end)
return;
Expand All @@ -216,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)

start = roundup(start, ZONE_ALIGN);

printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
start, end);

start_pfn = start >> PAGE_SHIFT;
Expand All @@ -235,10 +237,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);

memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
NODE_DATA(nodeid)->node_id = nodeid;
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;

#ifndef CONFIG_NO_BOOTMEM
NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];

/*
* Find a place for the bootmem map
* nodedata_phys could be on other nodes by alloc_bootmem,
Expand Down Expand Up @@ -275,6 +280,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);

free_bootmem_with_active_regions(nodeid, end);
#endif

node_set_online(nodeid);
}
Expand Down Expand Up @@ -733,6 +739,10 @@ unsigned long __init numa_free_all_bootmem(void)
for_each_online_node(i)
pages += free_all_bootmem_node(NODE_DATA(i));

#ifdef CONFIG_NO_BOOTMEM
pages += free_all_memory_core_early(MAX_NUMNODES);
#endif

return pages;
}

Expand Down
7 changes: 7 additions & 0 deletions include/linux/bootmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ extern unsigned long max_pfn;
extern unsigned long saved_max_pfn;
#endif

#ifndef CONFIG_NO_BOOTMEM
/*
* node_bootmem_map is a map pointer - the bits represent all physical
* memory pages (including holes) on the node.
Expand All @@ -37,6 +38,7 @@ typedef struct bootmem_data {
} bootmem_data_t;

extern bootmem_data_t bootmem_node_data[];
#endif

extern unsigned long bootmem_bootmap_pages(unsigned long);

Expand All @@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
unsigned long endpfn);
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);

unsigned long free_all_memory_core_early(int nodeid);
extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
extern unsigned long free_all_bootmem(void);

Expand Down Expand Up @@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
void *__alloc_bootmem_node_high(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
Expand Down
5 changes: 5 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/prio_tree.h>
#include <linux/debug_locks.h>
#include <linux/mm_types.h>
#include <linux/range.h>

struct mempolicy;
struct anon_vma;
Expand Down Expand Up @@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid,
extern unsigned long find_min_pfn_with_active_regions(void);
extern void free_bootmem_with_active_regions(int nid,
unsigned long max_low_pfn);
int add_from_early_node_map(struct range *range, int az,
int nr_range, int nid);
void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
u64 goal, u64 limit);
typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
Expand Down
2 changes: 2 additions & 0 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,9 @@ typedef struct pglist_data {
struct page_cgroup *node_page_cgroup;
#endif
#endif
#ifndef CONFIG_NO_BOOTMEM
struct bootmem_data *bdata;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Must be held any time you expect node_start_pfn, node_present_pages
Expand Down
Loading

0 comments on commit 0867721

Please sign in to comment.