Skip to content

Commit

Permalink
x86/smpboot: Split up native_cpu_up() into separate phases and docume…
Browse files Browse the repository at this point in the history
…nt them

There are four logical parts to what native_cpu_up() does on the BSP (or
on the controlling CPU for a later hotplug):

 1) Wake the AP by sending the INIT/SIPI/SIPI sequence.

 2) Wait for the AP to make it as far as wait_for_master_cpu() which
    sets that CPU's bit in cpu_initialized_mask, then sets the bit in
    cpu_callout_mask to let the AP proceed through cpu_init().

 3) Wait for the AP to finish cpu_init() and get as far as the
    smp_callin() call, which sets that CPU's bit in cpu_callin_mask.

 4) Perform the TSC synchronization and wait for the AP to actually
    mark itself online in cpu_online_mask.

In preparation to allow these phases to operate in parallel on multiple
APs, split them out into separate functions and document the interactions
a little more clearly in both the BP and AP code paths.

No functional change intended.

Signed-off-by: David Woodhouse <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Tested-by: Michael Kelley <[email protected]>
Tested-by: Oleksandr Natalenko <[email protected]>
Tested-by: Helge Deller <[email protected]> # parisc
Tested-by: Guilherme G. Piccoli <[email protected]> # Steam Deck
Link: https://lore.kernel.org/r/[email protected]
  • Loading branch information
dwmw2 authored and Peter Zijlstra committed May 15, 2023
1 parent c7f15dd commit 2b3be65
Showing 1 changed file with 119 additions and 65 deletions.
184 changes: 119 additions & 65 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ static void smp_callin(void)

wmb();

/*
* This runs the AP through all the cpuhp states to its target
* state CPUHP_ONLINE.
*/
notify_cpu_starting(cpuid);

/*
Expand Down Expand Up @@ -233,12 +237,28 @@ static void notrace start_secondary(void *unused)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
/*
* Sync point with wait_cpu_initialized(). Before proceeding through
* cpu_init(), the AP will call wait_for_master_cpu() which sets its
* own bit in cpu_initialized_mask and then waits for the BSP to set
* its bit in cpu_callout_mask to release it.
*/
cpu_init_secondary();
rcu_cpu_starting(raw_smp_processor_id());
x86_cpuinit.early_percpu_clock_init();

/*
* Sync point with wait_cpu_callin(). The AP doesn't wait here
* but just sets the bit to let the controlling CPU (BSP) know that
* it's got this far.
*/
smp_callin();

/* Check TSC synchronization with the control CPU: */
/*
* Check TSC synchronization with the control CPU, which will do
* its part of this from wait_cpu_online(), making it an implicit
* synchronization point.
*/
check_tsc_sync_target();

/*
Expand All @@ -257,6 +277,7 @@ static void notrace start_secondary(void *unused)
* half valid vector space.
*/
lock_vector_lock();
/* Sync point with do_wait_cpu_online() */
set_cpu_online(smp_processor_id(), true);
lapic_online();
unlock_vector_lock();
Expand Down Expand Up @@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
* Returns zero if CPU booted OK, else error code from
* Returns zero if startup was successfully sent, else error code from
* ->wakeup_secondary_cpu.
*/
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
/* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start;

unsigned long boot_error = 0;
unsigned long timeout;

#ifdef CONFIG_X86_64
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
if (apic->wakeup_secondary_cpu_64)
Expand Down Expand Up @@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
* - Use an INIT boot APIC message
*/
if (apic->wakeup_secondary_cpu_64)
boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
return apic->wakeup_secondary_cpu_64(apicid, start_ip);
else if (apic->wakeup_secondary_cpu)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
return apic->wakeup_secondary_cpu(apicid, start_ip);

if (!boot_error) {
/*
* Wait 10s total for first sign of life from AP
*/
boot_error = -1;
timeout = jiffies + 10*HZ;
while (time_before(jiffies, timeout)) {
if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
/*
* Tell AP to proceed with initialization
*/
cpumask_set_cpu(cpu, cpu_callout_mask);
boot_error = 0;
break;
}
schedule();
}
}
return wakeup_secondary_cpu_via_init(apicid, start_ip);
}

if (!boot_error) {
/*
* Wait till AP completes initial initialization
*/
while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
/*
* Allow other tasks to run while we wait for the
* AP to come online. This also gives a chance
* for the MTRR work(triggered by the AP coming online)
* to be completed in the stop machine context.
*/
schedule();
}
}
static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
{
unsigned long timeout;

if (x86_platform.legacy.warm_reset) {
/*
* Cleanup possible dangling ends...
*/
smpboot_restore_warm_reset_vector();
/*
* Wait up to 10s for the CPU to report in.
*/
timeout = jiffies + 10*HZ;
while (time_before(jiffies, timeout)) {
if (cpumask_test_cpu(cpu, mask))
return 0;

schedule();
}
return -1;
}

return boot_error;
/*
* Bringup step two: Wait for the target AP to reach cpu_init_secondary()
* and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
* to proceed. The AP will then proceed past setting its 'callin' bit
* and end up waiting in check_tsc_sync_target() until we reach
* do_wait_cpu_online() to tend to it.
*/
static int wait_cpu_initialized(unsigned int cpu)
{
/*
* Wait for first sign of life from AP.
*/
if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
return -1;

cpumask_set_cpu(cpu, cpu_callout_mask);
return 0;
}

int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/*
* Bringup step three: Wait for the target AP to reach smp_callin().
* The AP is not waiting for us here so we don't need to parallelise
* this step. Not entirely clear why we care about this, since we just
* proceed directly to TSC synchronization which is the next sync
* point with the AP anyway.
*/
static void wait_cpu_callin(unsigned int cpu)
{
while (!cpumask_test_cpu(cpu, cpu_callin_mask))
schedule();
}

/*
* Bringup step four: Synchronize the TSC and wait for the target AP
* to reach set_cpu_online() in start_secondary().
*/
static void wait_cpu_online(unsigned int cpu)
{
int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;

/*
* Check TSC synchronization with the AP (keep irqs disabled
* while doing so):
*/
local_irq_save(flags);
check_tsc_sync_source(cpu);
local_irq_restore(flags);

/*
* Wait for the AP to mark itself online, so the core caller
* can drop sparse_irq_lock.
*/
while (!cpu_online(cpu))
schedule();
}

static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
int err;

lockdep_assert_irqs_enabled();
Expand Down Expand Up @@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
return err;

err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
if (err)
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
return err;
}

/*
* Check TSC synchronization with the AP (keep irqs disabled
* while doing so):
*/
local_irq_save(flags);
check_tsc_sync_source(cpu);
local_irq_restore(flags);
return err;
}

while (!cpu_online(cpu)) {
cpu_relax();
touch_nmi_watchdog();
}
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret;

return 0;
ret = native_kick_ap(cpu, tidle);
if (ret)
goto out;

ret = wait_cpu_initialized(cpu);
if (ret)
goto out;

wait_cpu_callin(cpu);
wait_cpu_online(cpu);

out:
/* Cleanup possible dangling ends... */
if (x86_platform.legacy.warm_reset)
smpboot_restore_warm_reset_vector();

return ret;
}

/**
Expand Down

0 comments on commit 2b3be65

Please sign in to comment.