Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull Paolo Bonzini:
 "Bugfixes for x86, PPC and s390"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce()
  KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state
  KVM: x86: simplify handling of PKRU
  KVM: x86: block guest protection keys unless the host has them enabled
  KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them
  KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss
  KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9
  KVM: s390: sthyi: fix specification exception detection
  KVM: s390: sthyi: fix sthyi inline assembly
  • Loading branch information
torvalds committed Aug 26, 2017
2 parents 17e34c4 + 47c5310 commit 67a3b5c
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 64 deletions.
56 changes: 34 additions & 22 deletions arch/powerpc/kvm/book3s_64_vio.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size;
int ret = -ENOMEM;
int i;
int fd = -1;

if (!args->size)
return -EINVAL;

/* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn)
return -EBUSY;
}

size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) {
stt = NULL;
goto fail;
}
if (ret)
return ret;

ret = -ENOMEM;
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
if (!stt)
goto fail;
goto fail_acct;

stt->liobn = args->liobn;
stt->page_shift = args->page_shift;
Expand All @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
}

kvm_get_kvm(kvm);
ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
stt, O_RDWR | O_CLOEXEC);
if (ret < 0)
goto fail;

mutex_lock(&kvm->lock);
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);

/* Check this LIOBN hasn't been previously allocated */
ret = 0;
list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
if (siter->liobn == args->liobn) {
ret = -EBUSY;
break;
}
}

if (!ret) {
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
kvm_get_kvm(kvm);
}

mutex_unlock(&kvm->lock);

return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
stt, O_RDWR | O_CLOEXEC);
if (!ret)
return fd;

fail:
if (stt) {
for (i = 0; i < npages; i++)
if (stt->pages[i])
__free_page(stt->pages[i]);
put_unused_fd(fd);

kfree(stt);
}
fail:
for (i = 0; i < npages; i++)
if (stt->pages[i])
__free_page(stt->pages[i]);

kfree(stt);
fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
return ret;
}

Expand Down
3 changes: 3 additions & 0 deletions arch/powerpc/kvm/book3s_hv_rmhandlers.S
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
BEGIN_FTR_SECTION
PPC_MSGSYNC
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq 4f
Expand Down
68 changes: 65 additions & 3 deletions arch/powerpc/kvm/book3s_xive_template.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,22 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
u8 cppr;
u16 ack;

/* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
/*
* Ensure any previous store to CPPR is ordered vs.
* the subsequent loads from PIPR or ACK.
*/
eieio();

/*
* DD1 bug workaround: If PIPR is less favored than CPPR
* ignore the interrupt or we might incorrectly lose an IPB
* bit.
*/
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
if (pipr >= xc->hw_cppr)
return;
}

/* Perform the acknowledge OS to register cycle. */
ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
Expand Down Expand Up @@ -235,6 +250,11 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
/*
* If we found an interrupt, adjust what the guest CPPR should
* be as if we had just fetched that interrupt from HW.
*
* Note: This can only make xc->cppr smaller as the previous
* loop will only exit with hirq != 0 if prio is lower than
* the current xc->cppr. Thus we don't need to re-check xc->mfrr
* for pending IPIs.
*/
if (hirq)
xc->cppr = prio;
Expand Down Expand Up @@ -380,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
old_cppr = xc->cppr;
xc->cppr = cppr;

/*
* Order the above update of xc->cppr with the subsequent
* read of xc->mfrr inside push_pending_to_hw()
*/
smp_mb();

/*
* We are masking less, we need to look for pending things
* to deliver and set VP pending bits accordingly to trigger
Expand Down Expand Up @@ -420,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
* used to signal MFRR changes is EOId when fetched from
* the queue.
*/
if (irq == XICS_IPI || irq == 0)
if (irq == XICS_IPI || irq == 0) {
/*
* This barrier orders the setting of xc->cppr vs.
* subsquent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb();
goto bail;
}

/* Find interrupt source */
sb = kvmppc_xive_find_source(xive, irq, &src);
if (!sb) {
pr_devel(" source not found !\n");
rc = H_PARAMETER;
/* Same as above */
smp_mb();
goto bail;
}
state = &sb->irq_state[src];
kvmppc_xive_select_irq(state, &hw_num, &xd);

state->in_eoi = true;
mb();

/*
* This barrier orders both setting of in_eoi above vs,
* subsequent test of guest_priority, and the setting
* of xc->cppr vs. subsquent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb();

again:
if (state->guest_priority == MASKED) {
Expand All @@ -461,6 +503,14 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)

}

/*
* This barrier orders the above guest_priority check
* and spin_lock/unlock with clearing in_eoi below.
*
* It also has to be a full mb() as it must ensure
* the MMIOs done in source_eoi() are completed before
* state->in_eoi is visible.
*/
mb();
state->in_eoi = false;
bail:
Expand Down Expand Up @@ -495,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
/* Locklessly write over MFRR */
xc->mfrr = mfrr;

/*
* The load of xc->cppr below and the subsequent MMIO store
* to the IPI must happen after the above mfrr update is
* globally visible so that:
*
* - Synchronize with another CPU doing an H_EOI or a H_CPPR
* updating xc->cppr then reading xc->mfrr.
*
* - The target of the IPI sees the xc->mfrr update
*/
mb();

/* Shoot the IPI if most favored than target cppr */
if (mfrr < xc->cppr)
__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
Expand Down
7 changes: 5 additions & 2 deletions arch/s390/kvm/sthyi.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ static int sthyi(u64 vaddr)
"srl %[cc],28\n"
: [cc] "=d" (cc)
: [code] "d" (code), [addr] "a" (addr)
: "memory", "cc");
: "3", "memory", "cc");
return cc;
}

Expand Down Expand Up @@ -425,14 +425,17 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
trace_kvm_s390_handle_sthyi(vcpu, code, addr);

if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

if (code & 0xffff) {
cc = 3;
goto out;
}

if (addr & ~PAGE_MASK)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

/*
* If the page has not yet been faulted in, we want to do that
* now and not after all the expensive calculations.
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/include/asm/fpu/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
return 0;
}

static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
{
if (use_xsave()) {
copy_kernel_to_xregs(&fpstate->xsave, -1);
copy_kernel_to_xregs(&fpstate->xsave, mask);
} else {
if (use_fxsr())
copy_kernel_to_fxregs(&fpstate->fxsave);
Expand All @@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
: : [addr] "m" (fpstate));
}

__copy_kernel_to_fpregs(fpstate);
__copy_kernel_to_fpregs(fpstate, -1);
}

extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ struct kvm_vcpu_arch {
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
u32 pkru;
u32 hflags;
u64 efer;
u64 apic_base;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled)
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
Expand Down
5 changes: 0 additions & 5 deletions arch/x86/kvm/kvm_cache_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}

static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->get_pkru(vcpu);
}

static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
{
vcpu->arch.hflags |= HF_GUEST_MASK;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* index of the protection domain, so pte_pkey * 2 is
* is the index of the first bit for the domain.
*/
pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;

/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
offset = (pfec & ~1) +
Expand Down
7 changes: 0 additions & 7 deletions arch/x86/kvm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}

static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
{
return 0;
}

static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
Expand Down Expand Up @@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,

.get_pkru = svm_get_pkru,

.tlb_flush = svm_flush_tlb,

.run = svm_vcpu_run,
Expand Down
25 changes: 8 additions & 17 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,6 @@ struct vcpu_vmx {

u64 current_tsc_ratio;

bool guest_pkru_valid;
u32 guest_pkru;
u32 host_pkru;

/*
Expand Down Expand Up @@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
}

static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
{
return to_vmx(vcpu)->guest_pkru;
}

static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
Expand Down Expand Up @@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);

if (vmx->guest_pkru_valid)
__write_pkru(vmx->guest_pkru);
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vcpu->arch.pkru);

atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
Expand Down Expand Up @@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* back on host, so it is safe to read guest PKRU from current
* XSAVE.
*/
if (boot_cpu_has(X86_FEATURE_OSPKE)) {
vmx->guest_pkru = __read_pkru();
if (vmx->guest_pkru != vmx->host_pkru) {
vmx->guest_pkru_valid = true;
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
vcpu->arch.pkru = __read_pkru();
if (vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
} else
vmx->guest_pkru_valid = false;
}

/*
Expand Down Expand Up @@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,

.get_pkru = vmx_get_pkru,

.tlb_flush = vmx_flush_tlb,

.run = vmx_vcpu_run,
Expand Down
Loading

0 comments on commit 67a3b5c

Please sign in to comment.