Skip to content

Commit

Permalink
Merge tag 'powerpc-5.18-2' of git://git.kernel.org/pub/scm/linux/kern…
Browse files Browse the repository at this point in the history
…el/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - Fix KVM "lost kick" race, where an attempt to pull a vcpu out of the
   guest could be lost (or delayed until the next guest exit).

 - Disable SCV (system call vectored) when PR KVM guests could be run.

 - Fix KVM PR guests using SCV, by disallowing AIL != 0 for KVM PR
   guests.

 - Add a new KVM CAP to indicate if AIL == 3 is supported.

 - Fix a regression when hotplugging a CPU to a memoryless/cpuless node.

 - Make virt_addr_valid() stricter for 64-bit Book3E & 32-bit, which
   fixes crashes seen due to hardened usercopy.

 - Revert a change to max_mapnr which broke HIGHMEM.

Thanks to Christophe Leroy, Fabiano Rosas, Kefeng Wang, Nicholas Piggin,
and Srikar Dronamraju.

* tag 'powerpc-5.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  Revert "powerpc: Set max_mapnr correctly"
  powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit
  KVM: PPC: Move kvmhv_on_pseries() into kvm_ppc.h
  powerpc/numa: Handle partially initialized numa nodes
  powerpc/64: Fix build failure with allyesconfig in book3s_64_entry.S
  KVM: PPC: Use KVM_CAP_PPC_AIL_MODE_3
  KVM: PPC: Book3S PR: Disallow AIL != 0
  KVM: PPC: Book3S PR: Disable SCV when AIL could be disabled
  KVM: PPC: Book3S HV P9: Fix "lost kick" race
  • Loading branch information
torvalds committed Apr 10, 2022
2 parents 1519610 + 1ff5c8e commit 4ea3c64
Show file tree
Hide file tree
Showing 15 changed files with 169 additions and 35 deletions.
12 changes: 0 additions & 12 deletions arch/powerpc/include/asm/kvm_book3s_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,6 @@
#include <asm/ppc-opcode.h>
#include <asm/pte-walk.h>

#ifdef CONFIG_PPC_PSERIES
static inline bool kvmhv_on_pseries(void)
{
return !cpu_has_feature(CPU_FTR_HVMODE);
}
#else
static inline bool kvmhv_on_pseries(void)
{
return false;
}
#endif

/*
* Structure for a nested guest, that is, for a guest that is managed by
* one of our guests.
Expand Down
12 changes: 12 additions & 0 deletions arch/powerpc/include/asm/kvm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,18 @@ static inline bool kvm_hv_mode_active(void) { return false; }

#endif

#ifdef CONFIG_PPC_PSERIES
static inline bool kvmhv_on_pseries(void)
{
return !cpu_has_feature(CPU_FTR_HVMODE);
}
#else
static inline bool kvmhv_on_pseries(void)
{
return false;
}
#endif

#ifdef CONFIG_KVM_XICS
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
Expand Down
6 changes: 5 additions & 1 deletion arch/powerpc/include/asm/page.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)

#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
#define virt_addr_valid(vaddr) ({ \
unsigned long _addr = (unsigned long)vaddr; \
_addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \
pfn_valid(virt_to_pfn(_addr)); \
})

/*
* On Book-E parts we need __va to parse the device tree and we can't
Expand Down
2 changes: 2 additions & 0 deletions arch/powerpc/include/asm/setup.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180

#ifdef CONFIG_PPC_PSERIES
extern bool pseries_reloc_on_exception(void);
extern bool pseries_enable_reloc_on_exc(void);
extern void pseries_disable_reloc_on_exc(void);
extern void pseries_big_endian_exceptions(void);
void __init pseries_little_endian_exceptions(void);
#else
static inline bool pseries_reloc_on_exception(void) { return false; }
static inline bool pseries_enable_reloc_on_exc(void) { return false; }
static inline void pseries_disable_reloc_on_exc(void) {}
static inline void pseries_big_endian_exceptions(void) {}
Expand Down
4 changes: 4 additions & 0 deletions arch/powerpc/kernel/exceptions-64s.S
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,10 @@ __start_interrupts:
* - MSR_EE|MSR_RI is clear (no reentrant exceptions)
* - Standard kernel environment is set up (stack, paca, etc)
*
* KVM:
* These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM
* ensures that FSCR[SCV] is disabled whenever it has to force AIL off.
*
* Call convention:
*
* syscall register convention is in Documentation/powerpc/syscall64-abi.rst
Expand Down
28 changes: 28 additions & 0 deletions arch/powerpc/kernel/setup_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,34 @@ static void __init configure_exceptions(void)

/* Under a PAPR hypervisor, we need hypercalls */
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
/*
* - PR KVM does not support AIL mode interrupts in the host
* while a PR guest is running.
*
* - SCV system call interrupt vectors are only implemented for
* AIL mode interrupts.
*
* - On pseries, AIL mode can only be enabled and disabled
* system-wide so when a PR VM is created on a pseries host,
* all CPUs of the host are set to AIL=0 mode.
*
* - Therefore host CPUs must not execute scv while a PR VM
* exists.
*
* - SCV support can not be disabled dynamically because the
* feature is advertised to host userspace. Disabling the
* facility and emulating it would be possible but is not
* implemented.
*
* - So SCV support is blanket disabled if PR KVM could possibly
* run. That is, PR support compiled in, booting on pseries
* with hash MMU.
*/
if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
init_task.thread.fscr &= ~FSCR_SCV;
cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
}

/* Enable AIL if possible */
if (!pseries_enable_reloc_on_exc()) {
init_task.thread.fscr &= ~FSCR_SCV;
Expand Down
9 changes: 9 additions & 0 deletions arch/powerpc/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,21 @@ config KVM_BOOK3S_64_PR
guest in user mode (problem state) and emulating all
privileged instructions and registers.

This is only available for hash MMU mode and only supports
guests that use hash MMU mode.

This is not as fast as using hypervisor mode, but works on
machines where hypervisor mode is not available or not usable,
and can emulate processors that are different from the host
processor, including emulating 32-bit processors on a 64-bit
host.

Selecting this option will cause the SCV facility to be
disabled when the kernel is booted on the pseries platform in
hash MMU mode (regardless of PR VMs running). When any PR VMs
are running, "AIL" mode is disabled which may slow interrupts
and system calls on the host.

config KVM_BOOK3S_HV_EXIT_TIMING
bool "Detailed timing for hypervisor real-mode code"
depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
Expand Down
10 changes: 8 additions & 2 deletions arch/powerpc/kvm/book3s_64_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
*/
ld r10,HSTATE_SCRATCH0(r13)
cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
beq machine_check_common
beq .Lcall_machine_check_common

cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
beq system_reset_common
beq .Lcall_system_reset_common

b .

.Lcall_machine_check_common:
b machine_check_common

.Lcall_system_reset_common:
b system_reset_common
#endif
41 changes: 33 additions & 8 deletions arch/powerpc/kvm/book3s_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
int cpu;
struct rcuwait *waitp;

/*
* rcuwait_wake_up contains smp_mb() which orders prior stores that
* create pending work vs below loads of cpu fields. The other side
* is the barrier in vcpu run that orders setting the cpu fields vs
* testing for pending work.
*/

waitp = kvm_arch_vcpu_get_wait(vcpu);
if (rcuwait_wake_up(waitp))
++vcpu->stat.generic.halt_wakeup;
Expand Down Expand Up @@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
}
tvcpu->arch.prodded = 1;
smp_mb();
smp_mb(); /* This orders prodded store vs ceded load */
if (tvcpu->arch.ceded)
kvmppc_fast_vcpu_kick_hv(tvcpu);
break;
Expand Down Expand Up @@ -3766,6 +3773,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
pvc = core_info.vc[sub];
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
/*
* XXX: is kvmppc_start_thread called too late here?
* It updates vcpu->cpu and vcpu->arch.thread_cpu
* which are used by kvmppc_fast_vcpu_kick_hv(), but
* kick is called after new exceptions become available
* and exceptions are checked earlier than here, by
* kvmppc_core_prepare_to_enter.
*/
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
Expand Down Expand Up @@ -4487,6 +4502,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
if (need_resched() || !kvm->arch.mmu_ready)
goto out;

vcpu->cpu = pcpu;
vcpu->arch.thread_cpu = pcpu;
vc->pcpu = pcpu;
local_paca->kvm_hstate.kvm_vcpu = vcpu;
local_paca->kvm_hstate.ptid = 0;
local_paca->kvm_hstate.fake_suspend = 0;

/*
* Orders set cpu/thread_cpu vs testing for pending interrupts and
* doorbells below. The other side is when these fields are set vs
* kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to
* kick a vCPU to notice the pending interrupt.
*/
smp_mb();

if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
Expand All @@ -4506,13 +4536,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,

tb = mftb();

vcpu->cpu = pcpu;
vcpu->arch.thread_cpu = pcpu;
vc->pcpu = pcpu;
local_paca->kvm_hstate.kvm_vcpu = vcpu;
local_paca->kvm_hstate.ptid = 0;
local_paca->kvm_hstate.fake_suspend = 0;

__kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);

trace_kvm_guest_enter(vcpu);
Expand Down Expand Up @@ -4614,6 +4637,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
out:
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;
Expand Down
26 changes: 17 additions & 9 deletions arch/powerpc/kvm/book3s_pr.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
svcpu->in_use = 0;
svcpu_put(svcpu);
#endif

/* Disable AIL if supported */
if (cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_ARCH_207S))
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
}
#endif

vcpu->cpu = smp_processor_id();
#ifdef CONFIG_PPC_BOOK3S_32
Expand All @@ -165,6 +168,14 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
svcpu_put(svcpu);

/* Enable AIL if supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
}
#endif

if (kvmppc_is_split_real(vcpu))
Expand All @@ -174,11 +185,6 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
kvmppc_save_tm_pr(vcpu);

/* Enable AIL if supported */
if (cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_ARCH_207S))
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);

vcpu->cpu = -1;
}

Expand Down Expand Up @@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)

void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
{
if (fscr & FSCR_SCV)
fscr &= ~FSCR_SCV; /* SCV must not be enabled */
if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
/* TAR got dropped, drop it in shadow too */
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
Expand Down
20 changes: 20 additions & 0 deletions arch/powerpc/kvm/book3s_pr_papr.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,22 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}

static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu)
{
unsigned long mflags = kvmppc_get_gpr(vcpu, 4);
unsigned long resource = kvmppc_get_gpr(vcpu, 5);

if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) {
/* KVM PR does not provide AIL!=0 to guests */
if (mflags == 0)
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
else
kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63);
return EMULATE_DONE;
}
return EMULATE_FAIL;
}

#ifdef CONFIG_SPAPR_TCE_IOMMU
static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
{
Expand Down Expand Up @@ -384,6 +400,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_logical_ci_load(vcpu);
case H_LOGICAL_CI_STORE:
return kvmppc_h_pr_logical_ci_store(vcpu);
case H_SET_MODE:
return kvmppc_h_pr_set_mode(vcpu);
case H_XIRR:
case H_CPPR:
case H_EOI:
Expand Down Expand Up @@ -421,6 +439,7 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
case H_CEDE:
case H_LOGICAL_CI_LOAD:
case H_LOGICAL_CI_STORE:
case H_SET_MODE:
#ifdef CONFIG_KVM_XICS
case H_XIRR:
case H_CPPR:
Expand All @@ -447,6 +466,7 @@ static unsigned int default_hcall_list[] = {
H_BULK_REMOVE,
H_PUT_TCE,
H_CEDE,
H_SET_MODE,
#ifdef CONFIG_KVM_XICS
H_XIRR,
H_CPPR,
Expand Down
17 changes: 17 additions & 0 deletions arch/powerpc/kvm/powerpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
#endif
case KVM_CAP_PPC_AIL_MODE_3:
r = 0;
/*
* KVM PR, POWER7, and some POWER9s don't support AIL=3 mode.
* The POWER9s can support it if the guest runs in hash mode,
* but QEMU doesn't necessarily query the capability in time.
*/
if (hv_enabled) {
if (kvmhv_on_pseries()) {
if (pseries_reloc_on_exception())
r = 1;
} else if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
r = 1;
}
}
break;
default:
r = 0;
break;
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/mm/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ void __init mem_init(void)
#endif

high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
set_max_mapnr(max_low_pfn);
set_max_mapnr(max_pfn);

kasan_late_init();

Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/mm/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -1436,7 +1436,7 @@ int find_and_online_cpu_nid(int cpu)
if (new_nid < 0 || !node_possible(new_nid))
new_nid = first_online_node;

if (NODE_DATA(new_nid) == NULL) {
if (!node_online(new_nid)) {
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Need to ensure that NODE_DATA is initialized for a node from
Expand Down
Loading

0 comments on commit 4ea3c64

Please sign in to comment.