Skip to content

Commit

Permalink
x86: cleanup early per cpu variables/accesses v4
Browse files Browse the repository at this point in the history
  * Introduce a new PER_CPU macro called "EARLY_PER_CPU".  This is
    used by some per_cpu variables that are initialized and accessed
    before there are per_cpu areas allocated.

    ["Early" in respect to per_cpu variables is "earlier than the per_cpu
    areas have been setup".]

    This patchset adds these new macros:

	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)
	EXPORT_EARLY_PER_CPU_SYMBOL(_name)
	DECLARE_EARLY_PER_CPU(_type, _name)

	early_per_cpu_ptr(_name)
	early_per_cpu_map(_name, _idx)
	early_per_cpu(_name, _cpu)

    The DEFINE macro defines the per_cpu variable as well as the early
    map and pointer.  It also initializes the per_cpu variable and map
    elements to "_initvalue".  The early_* macros provide access to
    the initial map (usually setup during system init) and the early
    pointer.  This pointer is initialized to point to the early map
    but is then NULL'ed when the actual per_cpu areas are setup.  After
    that the per_cpu variable is the correct access to the variable.

    The early_per_cpu() macro is not very efficient but does show how to
    access the variable if you have a function that can be called both
    "early" and "late".  It tests the early ptr to be NULL, and if not
    then it's still valid.  Otherwise, the per_cpu variable is used
    instead:

	#define early_per_cpu(_name, _cpu) 			\
		(early_per_cpu_ptr(_name) ?			\
			early_per_cpu_ptr(_name)[_cpu] :	\
			per_cpu(_name, _cpu))

    A better method is to actually check the pointer manually.  In the
    case below, numa_set_node can be called both "early" and "late":

	void __cpuinit numa_set_node(int cpu, int node)
	{
	    int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);

	    if (cpu_to_node_map)
		    cpu_to_node_map[cpu] = node;
	    else
		    per_cpu(x86_cpu_to_node_map, cpu) = node;
	}

  * Add a flag "arch_provides_topology_pointers" that indicates pointers
    to topology cpumask_t maps are available.  Otherwise, use the function
    returning the cpumask_t value.  This is useful if cpumask_t set size
    is very large to avoid copying data on to/off of the stack.

  * The coverage of CONFIG_DEBUG_PER_CPU_MAPS has been increased while
    the non-debug case has been optimized a bit.

  * Remove an unreferenced compiler warning in drivers/base/topology.c

  * Clean up #ifdef in setup.c

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
  • Loading branch information
Mike Travis authored and Ingo Molnar committed Jul 8, 2008
1 parent 1184dc2 commit 23ca4bb
Show file tree
Hide file tree
Showing 15 changed files with 270 additions and 196 deletions.
2 changes: 1 addition & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
def_bool y

config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)

config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ config DEBUG_PAGEALLOC
config DEBUG_PER_CPU_MAPS
bool "Debug access to per_cpu maps"
depends on DEBUG_KERNEL
depends on X86_64_SMP
depends on X86_SMP
default n
help
Say Y to verify that the per_cpu map being accessed has
Expand Down
9 changes: 3 additions & 6 deletions arch/x86/kernel/apic_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@

unsigned long mp_lapic_addr;

DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);

/*
* Knob to control our willingness to enable the local APIC.
*
Expand Down Expand Up @@ -1534,9 +1531,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
#ifdef CONFIG_SMP
/* are we being called early in kernel startup? */
if (x86_cpu_to_apicid_early_ptr) {
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);

cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid;
Expand Down
11 changes: 4 additions & 7 deletions arch/x86/kernel/apic_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,6 @@ static unsigned long apic_phys;

unsigned long mp_lapic_addr;

DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);

unsigned int __cpuinitdata maxcpus = NR_CPUS;
/*
* Get the LAPIC version
Expand Down Expand Up @@ -1091,9 +1088,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
cpu = 0;
}
/* are we being called early in kernel startup? */
if (x86_cpu_to_apicid_early_ptr) {
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);

cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid;
Expand Down Expand Up @@ -1269,7 +1266,7 @@ __cpuinit int apic_is_clustered_box(void)
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
return 0;

bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);

for (i = 0; i < NR_CPUS; i++) {
Expand Down
96 changes: 84 additions & 12 deletions arch/x86/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,23 @@ unsigned disabled_cpus __cpuinitdata;
unsigned int boot_cpu_physical_apicid = -1U;
EXPORT_SYMBOL(boot_cpu_physical_apicid);

DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);

/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
#endif

/* map cpu index to physical APIC ID */
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);

#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
#define X86_64_NUMA 1

DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
#endif

#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
/*
* Copy data used in early init routines from the initial arrays to the
Expand All @@ -37,20 +47,21 @@ static void __init setup_per_cpu_maps(void)
int cpu;

for_each_possible_cpu(cpu) {
per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
x86_bios_cpu_apicid_init[cpu];
#ifdef CONFIG_NUMA
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#ifdef X86_64_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
x86_cpu_to_node_map_init[cpu];
early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif
}

/* indicate the early static arrays will soon be gone */
x86_cpu_to_apicid_early_ptr = NULL;
x86_bios_cpu_apicid_early_ptr = NULL;
#ifdef CONFIG_NUMA
x86_cpu_to_node_map_early_ptr = NULL;
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#ifdef X86_64_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
}

Expand Down Expand Up @@ -109,7 +120,8 @@ void __init setup_per_cpu_areas(void)
if (!node_online(node) || !NODE_DATA(node)) {
ptr = alloc_bootmem_pages(size);
printk(KERN_INFO
"cpu %d has no node or node-local memory\n", i);
"cpu %d has no node %d or node-local memory\n",
i, node);
}
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
Expand Down Expand Up @@ -137,3 +149,63 @@ void __init setup_per_cpu_areas(void)
}

#endif

#ifdef X86_64_NUMA
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);

if (cpu_to_node_map)
cpu_to_node_map[cpu] = node;

else if (per_cpu_offset(cpu))
per_cpu(x86_cpu_to_node_map, cpu) = node;

else
Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
}

void __cpuinit numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}

void __cpuinit numa_add_cpu(int cpu)
{
cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}

void __cpuinit numa_remove_cpu(int cpu)
{
cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
}
#endif /* CONFIG_NUMA */

#if defined(CONFIG_DEBUG_PER_CPU_MAPS) && defined(CONFIG_X86_64)

int cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
printk(KERN_WARNING
"cpu_to_node(%d): usage too early!\n", cpu);
dump_stack();
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
EXPORT_SYMBOL(cpu_to_node);

int early_cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];

if (!per_cpu_offset(cpu)) {
printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack();
return NUMA_NO_NODE;
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
#endif
24 changes: 0 additions & 24 deletions arch/x86/kernel/setup_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,18 +737,6 @@ char * __init __attribute__((weak)) memory_setup(void)
return machine_specific_memory_setup();
}

#ifdef CONFIG_NUMA
/*
* In the golden day, when everything among i386 and x86_64 will be
* integrated, this will not live here
*/
void *x86_cpu_to_node_map_early_ptr;
int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
#endif

/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
Expand Down Expand Up @@ -887,18 +875,6 @@ void __init setup_arch(char **cmdline_p)

io_delay_init();

#ifdef CONFIG_X86_SMP
/*
* setup to use the early static init tables during kernel startup
* X86_SMP will exclude sub-arches that don't deal well with it.
*/
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
#ifdef CONFIG_NUMA
x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
#endif
#endif

#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
Expand Down
9 changes: 0 additions & 9 deletions arch/x86/kernel/setup_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -406,15 +406,6 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
#endif

#ifdef CONFIG_SMP
/* setup to use the early static init tables during kernel startup */
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
#ifdef CONFIG_NUMA
x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
#endif
#endif

#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
Expand Down
20 changes: 2 additions & 18 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,6 @@
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>

/*
* FIXME: For x86_64, those are defined in other files. But moving them here,
* would make the setup areas dependent on smp, which is a loss. When we
* integrate apic between arches, we can probably do a better job, but
* right now, they'll stay here -- glommer
*/

/* which logical CPU number maps to which CPU (physical APIC ID) */
u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
{ [0 ... NR_CPUS-1] = BAD_APICID };
void *x86_cpu_to_apicid_early_ptr;

u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
= { [0 ... NR_CPUS-1] = BAD_APICID };
void *x86_bios_cpu_apicid_early_ptr;

#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
static int low_mappings;
Expand Down Expand Up @@ -992,7 +976,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
#ifdef CONFIG_X86_64
clear_node_cpumask(cpu); /* was set by numa_add_cpu */
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
#endif
cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
Expand Down Expand Up @@ -1373,7 +1357,7 @@ static void __ref remove_cpu_from_maps(int cpu)
cpu_clear(cpu, cpu_callin_map);
/* was set by cpu_init() */
clear_bit(cpu, (unsigned long *)&cpu_initialized);
clear_node_cpumask(cpu);
numa_remove_cpu(cpu);
#endif
}

Expand Down
43 changes: 11 additions & 32 deletions arch/x86/mm/numa_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,6 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];

struct memnode memnode;

#ifdef CONFIG_SMP
int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
void *x86_cpu_to_node_map_early_ptr;
EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
#endif
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);

s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
Expand Down Expand Up @@ -577,24 +567,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
}

__cpuinit void numa_add_cpu(int cpu)
{
set_bit(cpu,
(unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
}

void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;

if(cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else if(per_cpu_offset(cpu))
per_cpu(x86_cpu_to_node_map, cpu) = node;
else
Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
}

unsigned long __init numa_free_all_bootmem(void)
{
unsigned long pages = 0;
Expand Down Expand Up @@ -641,6 +613,7 @@ static __init int numa_setup(char *opt)
}
early_param("numa", numa_setup);

#ifdef CONFIG_NUMA
/*
* Setup early cpu_to_node.
*
Expand All @@ -652,14 +625,19 @@ early_param("numa", numa_setup);
* is already initialized in a round robin manner at numa_init_array,
* prior to this call, and this initialization is good enough
* for the fake NUMA cases.
*
* Called before the per_cpu areas are setup.
*/
void __init init_cpu_to_node(void)
{
int i;
int cpu;
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);

for (i = 0; i < NR_CPUS; i++) {
BUG_ON(cpu_to_apicid == NULL);

for_each_possible_cpu(cpu) {
int node;
u16 apicid = x86_cpu_to_apicid_init[i];
u16 apicid = cpu_to_apicid[cpu];

if (apicid == BAD_APICID)
continue;
Expand All @@ -668,8 +646,9 @@ void __init init_cpu_to_node(void)
continue;
if (!node_online(node))
continue;
numa_set_node(i, node);
numa_set_node(cpu, node);
}
}
#endif


2 changes: 1 addition & 1 deletion arch/x86/mm/srat_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
if (node == NUMA_NO_NODE)
continue;
if (!node_isset(node, node_possible_map))
numa_set_node(i, NUMA_NO_NODE);
numa_clear_node(i);
}
numa_init_array();
return 0;
Expand Down
Loading

0 comments on commit 23ca4bb

Please sign in to comment.