Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:

 - Allow again loading KVM on 32-bit non-PAE builds

 - Fixes for host SMIs on AMD

 - Fixes for guest SMIs on AMD

 - Fixes for selftests on s390 and ARM

 - Fix memory leak

 - Enforce no-instrumentation area on vmentry when hardware breakpoints
   are in use.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
  KVM: selftests: smm_test: Test SMM enter from L2
  KVM: nSVM: Restore nested control upon leaving SMM
  KVM: nSVM: Fix L1 state corruption upon return from SMM
  KVM: nSVM: Introduce svm_copy_vmrun_state()
  KVM: nSVM: Check that VM_HSAVE_PA MSR was set before VMRUN
  KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA
  KVM: SVM: Fix sev_pin_memory() error checks in SEV migration utilities
  KVM: SVM: Return -EFAULT if copy_to_user() for SEV mig packet header fails
  KVM: SVM: add module param to control the #SMI interception
  KVM: SVM: remove INIT intercept handler
  KVM: SVM: #SMI interception must not skip the instruction
  KVM: VMX: Remove vmx_msr_index from vmx.h
  KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run()
  KVM: selftests: Address extra memslot parameters in vm_vaddr_alloc
  kvm: debugfs: fix memory leak in kvm_create_vm_debugfs
  KVM: x86/pmu: Clear anythread deprecated bit when 0xa leaf is unsupported on the SVM
  KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio
  KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler
  KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs
  KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR
  ...
  • Loading branch information
torvalds committed Jul 15, 2021
2 parents f3523a2 + d951b22 commit 405386b
Show file tree
Hide file tree
Showing 21 changed files with 255 additions and 64 deletions.
30 changes: 25 additions & 5 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)

edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
edx.split.anythread_deprecated = 1;
if (cap.version)
edx.split.anythread_deprecated = 1;
edx.split.reserved1 = 0;
edx.split.reserved2 = 0;

Expand Down Expand Up @@ -940,8 +941,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned phys_as = entry->eax & 0xff;

if (!g_phys_as)
/*
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
* the guest operates in the same PA space as the host, i.e.
* reductions in MAXPHYADDR for memory encryption affect shadow
* paging, too.
*
* If TDP is enabled but an explicit guest MAXPHYADDR is not
* provided, use the raw bare metal MAXPHYADDR as reductions to
* the HPAs do not affect GPAs.
*/
if (!tdp_enabled)
g_phys_as = boot_cpu_data.x86_phys_bits;
else if (!g_phys_as)
g_phys_as = phys_as;

entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
Expand All @@ -964,12 +978,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
case 0x8000001a:
case 0x8000001e:
break;
/* Support memory encryption cpuid if host supports it */
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV))
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
else
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);

/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).
*/
entry->ebx &= ~GENMASK(11, 6);
}
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
#include <asm/kvm_page_track.h>
#include "trace.h"

#include "paging.h"

extern bool itlb_multihit_kvm_mitigation;

int __read_mostly nx_huge_pages = -1;
Expand Down
14 changes: 14 additions & 0 deletions arch/x86/kvm/mmu/paging.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Shadow paging constants/helpers that don't need to be #undef'd. */
#ifndef __KVM_X86_PAGING_H
#define __KVM_X86_PAGING_H

#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
#define PT64_LVL_ADDR_MASK(level) \
(GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
* PT64_LEVEL_BITS))) - 1))
#define PT64_LVL_OFFSET_MASK(level) \
(GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
* PT64_LEVEL_BITS))) - 1))
#endif /* __KVM_X86_PAGING_H */

4 changes: 2 additions & 2 deletions arch/x86/kvm/mmu/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
Expand Down Expand Up @@ -57,7 +57,7 @@
#define pt_element_t u64
#define guest_walker guest_walkerEPT
#define FNAME(name) ept_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
Expand Down
6 changes: 0 additions & 6 deletions arch/x86/kvm/mmu/spte.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
#else
#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
#endif
#define PT64_LVL_ADDR_MASK(level) \
(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
* PT64_LEVEL_BITS))) - 1))
#define PT64_LVL_OFFSET_MASK(level) \
(PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
* PT64_LEVEL_BITS))) - 1))

#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
| shadow_x_mask | shadow_nx_mask | shadow_me_mask)
Expand Down
53 changes: 33 additions & 20 deletions arch/x86/kvm/svm/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm)

for (i = 0; i < MAX_INTERCEPT; i++)
c->intercepts[i] |= g->intercepts[i];

/* If SMI is not intercepted, ignore guest SMI intercept as well */
if (!intercept_smi)
vmcb_clr_intercept(c, INTERCEPT_SMI);
}

static void copy_vmcb_control_area(struct vmcb_control_area *dst,
Expand Down Expand Up @@ -304,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
return true;
}

static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
struct vmcb_control_area *control)
void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
struct vmcb_control_area *control)
{
copy_vmcb_control_area(&svm->nested.ctl, control);

Expand Down Expand Up @@ -618,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
u64 vmcb12_gpa;

if (!svm->nested.hsave_msr) {
kvm_inject_gp(vcpu, 0);
return 1;
}

if (is_smm(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
Expand Down Expand Up @@ -692,6 +701,27 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
return ret;
}

/* Copy state save area fields which are handled by VMRUN */
void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
struct vmcb_save_area *to_save)
{
to_save->es = from_save->es;
to_save->cs = from_save->cs;
to_save->ss = from_save->ss;
to_save->ds = from_save->ds;
to_save->gdtr = from_save->gdtr;
to_save->idtr = from_save->idtr;
to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
to_save->efer = from_save->efer;
to_save->cr0 = from_save->cr0;
to_save->cr3 = from_save->cr3;
to_save->cr4 = from_save->cr4;
to_save->rax = from_save->rax;
to_save->rsp = from_save->rsp;
to_save->rip = from_save->rip;
to_save->cpl = 0;
}

void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
Expand Down Expand Up @@ -1355,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,

svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;

svm->vmcb01.ptr->save.es = save->es;
svm->vmcb01.ptr->save.cs = save->cs;
svm->vmcb01.ptr->save.ss = save->ss;
svm->vmcb01.ptr->save.ds = save->ds;
svm->vmcb01.ptr->save.gdtr = save->gdtr;
svm->vmcb01.ptr->save.idtr = save->idtr;
svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
svm->vmcb01.ptr->save.efer = save->efer;
svm->vmcb01.ptr->save.cr0 = save->cr0;
svm->vmcb01.ptr->save.cr3 = save->cr3;
svm->vmcb01.ptr->save.cr4 = save->cr4;
svm->vmcb01.ptr->save.rax = save->rax;
svm->vmcb01.ptr->save.rsp = save->rsp;
svm->vmcb01.ptr->save.rip = save->rip;
svm->vmcb01.ptr->save.cpl = 0;

svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
nested_load_control_from_vmcb12(svm, ctl);

svm_switch_vmcb(svm, &svm->nested.vmcb02);

nested_vmcb02_prepare_control(svm);

kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
Expand Down
14 changes: 8 additions & 6 deletions arch/x86/kvm/svm/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1272,8 +1272,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
if (!guest_page)
return -EFAULT;
if (IS_ERR(guest_page))
return PTR_ERR(guest_page);

/* allocate memory for header and transport buffer */
ret = -ENOMEM;
Expand Down Expand Up @@ -1310,8 +1310,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
}

/* Copy packet header to userspace. */
ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
params.hdr_len);
if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
params.hdr_len))
ret = -EFAULT;

e_free_trans_data:
kfree(trans_data);
Expand Down Expand Up @@ -1463,11 +1464,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.trans_len = params.trans_len;

/* Pin guest memory */
ret = -EFAULT;
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
if (!guest_page)
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
}

/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
Expand Down
77 changes: 70 additions & 7 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ module_param(avic, bool, 0444);
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);


bool intercept_smi = true;
module_param(intercept_smi, bool, 0444);


static bool svm_gp_erratum_intercept = true;

static u8 rsm_ins_bytes[] = "\x0f\xaa";
Expand Down Expand Up @@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)

svm_set_intercept(svm, INTERCEPT_INTR);
svm_set_intercept(svm, INTERCEPT_NMI);
svm_set_intercept(svm, INTERCEPT_SMI);

if (intercept_smi)
svm_set_intercept(svm, INTERCEPT_SMI);

svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
svm_set_intercept(svm, INTERCEPT_RDPMC);
svm_set_intercept(svm, INTERCEPT_CPUID);
Expand Down Expand Up @@ -1923,7 +1931,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;

trace_kvm_page_fault(fault_address, error_code);
Expand Down Expand Up @@ -2106,6 +2114,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu)
return 1;
}

static int smi_interception(struct kvm_vcpu *vcpu)
{
return 1;
}

static int intr_interception(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
Expand Down Expand Up @@ -2941,7 +2954,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm_disable_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
svm->nested.hsave_msr = data;
/*
* Old kernels did not validate the value written to
* MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid
* value to allow live migrating buggy or malicious guests
* originating from those kernels.
*/
if (!msr->host_initiated && !page_address_valid(vcpu, data))
return 1;

svm->nested.hsave_msr = data & PAGE_MASK;
break;
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
Expand Down Expand Up @@ -3080,8 +3102,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
[SVM_EXIT_SMI] = kvm_emulate_as_nop,
[SVM_EXIT_INIT] = kvm_emulate_as_nop,
[SVM_EXIT_SMI] = smi_interception,
[SVM_EXIT_VINTR] = interrupt_window_interception,
[SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
[SVM_EXIT_CPUID] = kvm_emulate_cpuid,
Expand Down Expand Up @@ -4288,6 +4309,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map_save;
int ret;

if (is_guest_mode(vcpu)) {
Expand All @@ -4303,20 +4325,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
ret = nested_svm_vmexit(svm);
if (ret)
return ret;

/*
* KVM uses VMCB01 to store L1 host state while L2 runs but
* VMCB01 is going to be used during SMM and thus the state will
* be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
* area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
* format of the area is identical to guest save area offsetted
* by 0x400 (matches the offset of 'struct vmcb_save_area'
* within 'struct vmcb'). Note: HSAVE area may also be used by
* L1 hypervisor to save additional host context (e.g. KVM does
* that, see svm_prepare_guest_switch()) which must be
* preserved.
*/
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
&map_save) == -EINVAL)
return 1;

BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);

svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
map_save.hva + 0x400);

kvm_vcpu_unmap(vcpu, &map_save, true);
}
return 0;
}

static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map;
struct kvm_host_map map, map_save;
int ret = 0;

if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
struct vmcb *vmcb12;

if (guest) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
Expand All @@ -4332,8 +4378,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (svm_allocate_nested(svm))
return 1;

ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva);
vmcb12 = map.hva;

nested_load_control_from_vmcb12(svm, &vmcb12->control);

ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
kvm_vcpu_unmap(vcpu, &map, true);

/*
* Restore L1 host state from L1 HSAVE area as VMCB01 was
* used during SMM (see svm_enter_smm())
*/
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
&map_save) == -EINVAL)
return 1;

svm_copy_vmrun_state(map_save.hva + 0x400,
&svm->vmcb01.ptr->save);

kvm_vcpu_unmap(vcpu, &map_save, true);
}
}

Expand Down
Loading

0 comments on commit 405386b

Please sign in to comment.