Skip to content

Commit

Permalink
x86/microcode: Add per CPU result state
Browse files Browse the repository at this point in the history
The microcode rendezvous is purely acting on global state, which does
not allow to analyze fails in a coherent way.

Introduce per CPU state where the results are written into, which allows to
analyze the return codes of the individual CPUs.

Initialize the state when walking the cpu_present_mask in the online
check to avoid another for_each_cpu() loop.

Enhance the result print out with that.

The structure is intentionally named ucode_ctrl as it will gain control
fields in subsequent changes.

Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
  • Loading branch information
KAGA-KOKO authored and bp3tk0v committed Oct 24, 2023
1 parent 0772b9a commit 4b75395
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 47 deletions.
114 changes: 67 additions & 47 deletions arch/x86/kernel/cpu/microcode/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,11 @@ static struct platform_device *microcode_pdev;
* requirement can be relaxed in the future. Right now, this is conservative
* and good.
*/
struct microcode_ctrl {
enum ucode_state result;
};

static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
static atomic_t late_cpus_in, late_cpus_out;

static bool wait_for_cpus(atomic_t *cnt)
Expand All @@ -274,23 +279,19 @@ static bool wait_for_cpus(atomic_t *cnt)
return false;
}

/*
* Returns:
* < 0 - on error
* 0 - success (no update done or microcode was updated)
*/
static int __reload_late(void *info)
static int load_cpus_stopped(void *unused)
{
int cpu = smp_processor_id();
enum ucode_state err;
int ret = 0;
enum ucode_state ret;

/*
* Wait for all CPUs to arrive. A load will not be attempted unless all
* CPUs show up.
* */
if (!wait_for_cpus(&late_cpus_in))
return -1;
if (!wait_for_cpus(&late_cpus_in)) {
this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
return 0;
}

/*
* On an SMT system, it suffices to load the microcode on one sibling of
Expand All @@ -299,17 +300,11 @@ static int __reload_late(void *info)
* loading attempts happen on multiple threads of an SMT core. See
* below.
*/
if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
err = microcode_ops->apply_microcode(cpu);
else
if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
goto wait_for_siblings;

if (err >= UCODE_NFOUND) {
if (err == UCODE_ERROR) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
ret = -1;
}
}
ret = microcode_ops->apply_microcode(cpu);
this_cpu_write(ucode_ctrl.result, ret);

wait_for_siblings:
if (!wait_for_cpus(&late_cpus_out))
Expand All @@ -321,19 +316,18 @@ static int __reload_late(void *info)
* per-cpu cpuinfo can be updated with right microcode
* revision.
*/
if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
err = microcode_ops->apply_microcode(cpu);
if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
return 0;

return ret;
ret = microcode_ops->apply_microcode(cpu);
this_cpu_write(ucode_ctrl.result, ret);
return 0;
}

/*
* Reload microcode late on all CPUs. Wait for a sec until they
* all gather together.
*/
static int microcode_reload_late(void)
static int load_late_stop_cpus(void)
{
int old = boot_cpu_data.microcode, ret;
unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
int old_rev = boot_cpu_data.microcode;
struct cpuinfo_x86 prev_info;

pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
Expand All @@ -348,26 +342,47 @@ static int microcode_reload_late(void)
*/
store_cpu_caps(&prev_info);

ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);

/* Analyze the results */
for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
switch (per_cpu(ucode_ctrl.result, cpu)) {
case UCODE_UPDATED: updated++; break;
case UCODE_TIMEOUT: timedout++; break;
case UCODE_OK: siblings++; break;
default: failed++; break;
}
}

if (microcode_ops->finalize_late_load)
microcode_ops->finalize_late_load(ret);

if (!ret) {
pr_info("Reload succeeded, microcode revision: 0x%x -> 0x%x\n",
old, boot_cpu_data.microcode);
microcode_check(&prev_info);
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
} else {
pr_info("Reload failed, current microcode revision: 0x%x\n",
boot_cpu_data.microcode);
microcode_ops->finalize_late_load(!updated);

if (!updated) {
/* Nothing changed. */
if (!failed && !timedout)
return 0;
pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
failed, timedout);
return -EIO;
}

add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
if (failed || timedout) {
pr_err("load incomplete. %u CPUs timed out or failed\n",
num_online_cpus() - (updated + siblings));
}
return ret;
pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
microcode_check(&prev_info);

return updated + siblings == num_online_cpus() ? 0 : -EIO;
}

/*
* Ensure that all required CPUs which are present and have been booted
* once are online.
* This function does two things:
*
* 1) Ensure that all required CPUs which are present and have been booted
* once are online.
*
* To pass this check, all primary threads must be online.
*
Expand All @@ -378,9 +393,12 @@ static int microcode_reload_late(void)
* behaviour is undefined. The default play_dead() implementation on
* modern CPUs uses MWAIT, which is also not guaranteed to be safe
* against a microcode update which affects MWAIT.
*
* 2) Initialize the per CPU control structure
*/
static bool ensure_cpus_are_online(void)
static bool setup_cpus(void)
{
struct microcode_ctrl ctrl = { .result = -1, };
unsigned int cpu;

for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
Expand All @@ -390,18 +408,20 @@ static bool ensure_cpus_are_online(void)
return false;
}
}
/* Initialize the per CPU state */
per_cpu(ucode_ctrl, cpu) = ctrl;
}
return true;
}

static int ucode_load_late_locked(void)
static int load_late_locked(void)
{
if (!ensure_cpus_are_online())
if (!setup_cpus())
return -EBUSY;

switch (microcode_ops->request_microcode_fw(0, &microcode_pdev->dev)) {
case UCODE_NEW:
return microcode_reload_late();
return load_late_stop_cpus();
case UCODE_NFOUND:
return -ENOENT;
default:
Expand All @@ -421,7 +441,7 @@ static ssize_t reload_store(struct device *dev,
return -EINVAL;

cpus_read_lock();
ret = ucode_load_late_locked();
ret = load_late_locked();
cpus_read_unlock();

return ret ? : size;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/cpu/microcode/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ enum ucode_state {
UCODE_UPDATED,
UCODE_NFOUND,
UCODE_ERROR,
UCODE_TIMEOUT,
};

struct microcode_ops {
Expand Down

0 comments on commit 4b75395

Please sign in to comment.