Skip to content

Commit

Permalink
[PATCH] i386 CPU hotplug
Browse files Browse the repository at this point in the history
(The i386 CPU hotplug patch provides infrastructure for some work which Pavel
is doing as well as for ACPI S3 (suspend-to-RAM) work which Li Shaohua
<[email protected]> is doing)

The following provides i386 architecture support for safely unregistering and
registering processors during runtime, updated for the current -mm tree.  In
order to avoid dumping cpu hotplug code into kernel/irq/* i dropped the
cpu_online check in do_IRQ() by modifying fixup_irqs().  The difference being
that on cpu offline, fixup_irqs() is called before we clear the cpu from
cpu_online_map and a long delay in order to ensure that we never have any
queued external interrupts on the APICs.  There are additional changes to s390
and ppc64 to account for this change.

1) Add CONFIG_HOTPLUG_CPU
2) disable local APIC timer on dead cpus.
3) Disable preempt around irq balancing to prevent CPUs going down.
4) Print irq stats for all possible cpus.
5) Debugging check for interrupts on offline cpus.
6) Hacky fixup_irqs() to redirect irqs when cpus go off/online.
7) play_dead() for offline cpus to spin inside.
8) Handle offline cpus set in flush_tlb_others().
9) Grab lock earlier in smp_call_function() to prevent CPUs going down.
10) Implement __cpu_disable() and __cpu_die().
11) Enable local interrupts in cpu_enable() after fixup_irqs()
12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus.
13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline.

Signed-off-by: Zwane Mwaikambo <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Zwane Mwaikambo authored and Linus Torvalds committed Jun 25, 2005
1 parent d92de65 commit f370513
Show file tree
Hide file tree
Showing 15 changed files with 243 additions and 40 deletions.
9 changes: 9 additions & 0 deletions arch/i386/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,15 @@ config SCx200
This support is also available as a module. If compiled as a
module, it will be called scx200.

config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
depends on SMP && HOTPLUG && EXPERIMENTAL
---help---
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.

Say N.

source "drivers/pcmcia/Kconfig"

source "drivers/pci/hotplug/Kconfig"
Expand Down
3 changes: 2 additions & 1 deletion arch/i386/kernel/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
#include <linux/cpu.h>

#include <asm/atomic.h>
#include <asm/smp.h>
Expand Down Expand Up @@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(void)
setup_APIC_timer(calibration_result);
}

void __init disable_APIC_timer(void)
void __devinit disable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
Expand Down
2 changes: 2 additions & 0 deletions arch/i386/kernel/io_apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,9 +576,11 @@ static int balanced_irq(void *unused)
try_to_freeze(PF_FREEZE);
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
preempt_disable();
do_irq_balance();
prev_balance_time = jiffies;
time_remaining = balanced_irq_interval;
preempt_enable();
}
}
return 0;
Expand Down
67 changes: 54 additions & 13 deletions arch/i386/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/delay.h>

DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
Expand Down Expand Up @@ -210,9 +213,8 @@ int show_interrupts(struct seq_file *p, void *v)

if (i == 0) {
seq_printf(p, " ");
for (j=0; j<NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "CPU%d ",j);
for_each_cpu(j)
seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}

Expand All @@ -225,9 +227,8 @@ int show_interrupts(struct seq_file *p, void *v)
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
for_each_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
Expand All @@ -240,16 +241,14 @@ int show_interrupts(struct seq_file *p, void *v)
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "%10u ", nmi_count(j));
for_each_cpu(j)
seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).apic_timer_irqs);
for_each_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
Expand All @@ -259,3 +258,45 @@ int show_interrupts(struct seq_file *p, void *v)
}
return 0;
}

#ifdef CONFIG_HOTPLUG_CPU
#include <mach_apic.h>

void fixup_irqs(cpumask_t map)
{
unsigned int irq;
static int warned;

for (irq = 0; irq < NR_IRQS; irq++) {
cpumask_t mask;
if (irq == 2)
continue;

cpus_and(mask, irq_affinity[irq], map);
if (any_online_cpu(mask) == NR_CPUS) {
printk("Breaking affinity for irq %i\n", irq);
mask = map;
}
if (irq_desc[irq].handler->set_affinity)
irq_desc[irq].handler->set_affinity(irq, mask);
else if (irq_desc[irq].action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}

#if 0
barrier();
/* Ingo Molnar says: "after the IO-APIC masks have been redirected
[note the nop - the interrupt-enable boundary on x86 is two
instructions from sti] - to flush out pending hardirqs and
IPIs. After this point nothing is supposed to reach this CPU." */
__asm__ __volatile__("sti; nop; cli");
barrier();
#else
/* That doesn't seem sufficient. Give it 1ms. */
local_irq_enable();
mdelay(1);
local_irq_disable();
#endif
}
#endif

39 changes: 38 additions & 1 deletion arch/i386/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <stdarg.h>

#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
Expand Down Expand Up @@ -55,6 +56,9 @@
#include <linux/irq.h>
#include <linux/err.h>

#include <asm/tlbflush.h>
#include <asm/cpu.h>

asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

static int hlt_counter;
Expand Down Expand Up @@ -143,14 +147,44 @@ static void poll_idle (void)
}
}

#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;

/* We shouldn't have to disable interrupts while dead, but
* some interrupts just don't seem to go away, and this makes
* it "work" for testing purposes. */
/* Death loop */
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();

local_irq_disable();
__flush_tlb_all();
cpu_set(smp_processor_id(), cpu_online_map);
enable_APIC_timer();
local_irq_enable();
}
#else
static inline void play_dead(void)
{
BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */

/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
void cpu_idle (void)
void cpu_idle(void)
{
int cpu = raw_smp_processor_id();

/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
Expand All @@ -165,6 +199,9 @@ void cpu_idle (void)
if (!idle)
idle = default_idle;

if (cpu_is_offline(cpu))
play_dead();

__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
Expand Down
24 changes: 15 additions & 9 deletions arch/i386/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/module.h>

#include <asm/mtrr.h>
Expand Down Expand Up @@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
unsigned long flags;

local_irq_save(flags);
WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
*/
Expand Down Expand Up @@ -346,21 +347,21 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
cpumask_t tmp;
/*
* A couple of (to be removed) sanity checks:
*
* - we do not send IPIs to not-yet booted CPUs.
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));

cpus_and(tmp, cpumask, cpu_online_map);
BUG_ON(!cpus_equal(cpumask, tmp));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);

/* If a CPU which we ran on has gone down, OK. */
cpus_and(cpumask, cpumask, cpu_online_map);
if (cpus_empty(cpumask))
return;

/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
Expand Down Expand Up @@ -476,6 +477,7 @@ void flush_tlb_all(void)
*/
void smp_send_reschedule(int cpu)
{
WARN_ON(cpu_is_offline(cpu));
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}

Expand Down Expand Up @@ -516,10 +518,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
*/
{
struct call_data_struct data;
int cpus = num_online_cpus()-1;
int cpus;

if (!cpus)
/* Holding any lock stops cpus from going down. */
spin_lock(&call_lock);
cpus = num_online_cpus() - 1;
if (!cpus) {
spin_unlock(&call_lock);
return 0;
}

/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
Expand All @@ -531,7 +538,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
if (wait)
atomic_set(&data.finished, 0);

spin_lock(&call_lock);
call_data = &data;
mb();

Expand Down
Loading

0 comments on commit f370513

Please sign in to comment.