Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:
 "More x86 fixes:

   - Logic bugs in CR0 writes and Hyper-V hypercalls

   - Don't use Enlightened MSR Bitmap for L3

   - Remove user-triggerable WARN

  Plus a few selftest fixes and a regression test for the
  user-triggerable WARN"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  selftests: KVM: Add test to verify KVM doesn't explode on "bad" I/O
  KVM: x86: Don't WARN if userspace mucks with RCX during string I/O exit
  KVM: X86: Raise #GP when clearing CR0_PG in 64 bit mode
  selftests: KVM: avoid failures due to reserved HyperTransport region
  KVM: x86: Ignore sparse banks size for an "all CPUs", non-sparse IPI req
  KVM: x86: Wait for IPIs to be delivered when handling Hyper-V TLB flush hypercall
  KVM: x86: selftests: svm_int_ctl_test: fix intercept calculation
  KVM: nVMX: Don't use Enlightened MSR Bitmap for L3
  • Loading branch information
torvalds committed Dec 10, 2021
2 parents b8a98b6 + 10e7a09 commit b9172f9
Show file tree
Hide file tree
Showing 11 changed files with 223 additions and 17 deletions.
2 changes: 1 addition & 1 deletion arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
#define KVM_REQ_TLB_FLUSH_GUEST \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
Expand Down
7 changes: 5 additions & 2 deletions arch/x86/kvm/hyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1922,18 +1922,21 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool

all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;

if (all_cpus)
goto check_and_send_ipi;

if (!sparse_banks_len)
goto ret_success;

if (!all_cpus &&
kvm_read_guest(kvm,
if (kvm_read_guest(kvm,
hc->ingpa + offsetof(struct hv_send_ipi_ex,
vp_set.bank_contents),
sparse_banks,
sparse_banks_len))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}

check_and_send_ipi:
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return HV_STATUS_INVALID_HYPERCALL_INPUT;

Expand Down
22 changes: 13 additions & 9 deletions arch/x86/kvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2646,15 +2646,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
if (!loaded_vmcs->msr_bitmap)
goto out_vmcs;
memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);

if (IS_ENABLED(CONFIG_HYPERV) &&
static_branch_unlikely(&enable_evmcs) &&
(ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
struct hv_enlightened_vmcs *evmcs =
(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;

evmcs->hv_enlightenments_control.msr_bitmap = 1;
}
}

memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
Expand Down Expand Up @@ -6842,6 +6833,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (err < 0)
goto free_pml;

/*
* Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a
* nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the
* feature only for vmcs01, KVM currently isn't equipped to realize any
* performance benefits from enabling it for vmcs02.
*/
if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
(ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;

evmcs->hv_enlightenments_control.msr_bitmap = 1;
}

/* The MSR bitmap starts with all ones */
bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
Expand Down
12 changes: 9 additions & 3 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
return 1;

if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
if (!(cr0 & X86_CR0_PG) &&
(is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
return 1;

static_call(kvm_x86_set_cr0)(vcpu, cr0);
Expand Down Expand Up @@ -7121,7 +7122,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
unsigned short port, void *val, unsigned int count)
{
if (vcpu->arch.pio.count) {
/* Complete previous iteration. */
/*
* Complete a previous iteration that required userspace I/O.
* Note, @count isn't guaranteed to match pio.count as userspace
* can modify ECX before rerunning the vCPU. Ignore any such
* shenanigans as KVM doesn't support modifying the rep count,
* and the emulator ensures @count doesn't overflow the buffer.
*/
} else {
int r = __emulator_pio_in(vcpu, size, port, count);
if (!r)
Expand All @@ -7130,7 +7137,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
/* Results already available, fall through. */
}

WARN_ON(count != vcpu->arch.pio.count);
complete_emulator_pio_in(vcpu, val);
return 1;
}
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/kvm/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
/x86_64/svm_int_ctl_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/userspace_io_test
/x86_64/userspace_msr_exit_test
/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
Expand Down
9 changes: 9 additions & 0 deletions tools/testing/selftests/kvm/include/kvm_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ enum vm_guest_mode {

#endif

#if defined(__x86_64__)
unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
#else
static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
#endif

#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)

Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/kvm/lib/kvm_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
(1ULL << (vm->va_bits - 1)) >> vm->page_shift);

/* Limit physical addresses to PA-bits. */
vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
vm->max_gfn = vm_compute_max_gfn(vm);

/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
Expand Down
68 changes: 68 additions & 0 deletions tools/testing/selftests/kvm/lib/x86_64/processor.c
Original file line number Diff line number Diff line change
Expand Up @@ -1431,3 +1431,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui

return cpuid;
}

#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65

static inline unsigned x86_family(unsigned int eax)
{
unsigned int x86;

x86 = (eax >> 8) & 0xf;

if (x86 == 0xf)
x86 += (eax >> 20) & 0xff;

return x86;
}

unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
unsigned long ht_gfn, max_gfn, max_pfn;
uint32_t eax, ebx, ecx, edx, max_ext_leaf;

max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;

/* Avoid reserved HyperTransport region on AMD processors. */
eax = ecx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
return max_gfn;

/* On parts with <40 physical address bits, the area is fully hidden */
if (vm->pa_bits < 40)
return max_gfn;

/* Before family 17h, the HyperTransport area is just below 1T. */
ht_gfn = (1 << 28) - num_ht_pages;
eax = 1;
cpuid(&eax, &ebx, &ecx, &edx);
if (x86_family(eax) < 0x17)
goto done;

/*
* Otherwise it's at the top of the physical address space, possibly
* reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
* the old conservative value if MAXPHYADDR is not enumerated.
*/
eax = 0x80000000;
cpuid(&eax, &ebx, &ecx, &edx);
max_ext_leaf = eax;
if (max_ext_leaf < 0x80000008)
goto done;

eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx);
max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
if (max_ext_leaf >= 0x8000001f) {
eax = 0x8000001f;
cpuid(&eax, &ebx, &ecx, &edx);
max_pfn >>= (ebx >> 6) & 0x3f;
}

ht_gfn = max_pfn - num_ht_pages;
done:
return min(max_gfn, ht_gfn - 1);
}
2 changes: 1 addition & 1 deletion tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm)
vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;

/* No intercepts for real and virtual interrupts */
vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));

/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
Expand Down
114 changes: 114 additions & 0 deletions tools/testing/selftests/kvm/x86_64/userspace_io_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// SPDX-License-Identifier: GPL-2.0
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>

#include "test_util.h"

#include "kvm_util.h"
#include "processor.h"

#define VCPU_ID 1

static void guest_ins_port80(uint8_t *buffer, unsigned int count)
{
unsigned long end;

if (count == 2)
end = (unsigned long)buffer + 1;
else
end = (unsigned long)buffer + 8192;

asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
GUEST_ASSERT_1(count == 0, count);
GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
}

static void guest_code(void)
{
uint8_t buffer[8192];
int i;

/*
* Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
* test that KVM doesn't explode when userspace modifies the "count" on
* a userspace I/O exit. KVM isn't required to play nice with the I/O
* itself as KVM doesn't support manipulating the count, it just needs
* to not explode or overflow a buffer.
*/
guest_ins_port80(buffer, 2);
guest_ins_port80(buffer, 3);

/* Verify KVM fills the buffer correctly when not stuffing RCX. */
memset(buffer, 0, sizeof(buffer));
guest_ins_port80(buffer, 8192);
for (i = 0; i < 8192; i++)
GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);

GUEST_DONE();
}

int main(int argc, char *argv[])
{
struct kvm_regs regs;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
int rc;

/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);

/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
run = vcpu_state(vm, VCPU_ID);

memset(&regs, 0, sizeof(regs));

while (1) {
rc = _vcpu_run(vm, VCPU_ID);

TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));

if (get_ucall(vm, VCPU_ID, &uc))
break;

TEST_ASSERT(run->io.port == 0x80,
"Expected I/O at port 0x80, got port 0x%x\n", run->io.port);

/*
* Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
* Note, this abuses KVM's batching of rep string I/O to avoid
* getting stuck in an infinite loop. That behavior isn't in
* scope from a testing perspective as it's not ABI in any way,
* i.e. it really is abusing internal KVM knowledge.
*/
vcpu_regs_get(vm, VCPU_ID, &regs);
if (regs.rcx == 2)
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
memset((void *)run + run->io.data_offset, 0xaa, 4096);
vcpu_regs_set(vm, VCPU_ID, &regs);
}

switch (uc.cmd) {
case UCALL_DONE:
break;
case UCALL_ABORT:
TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx",
(const char *)uc.args[0], __FILE__, uc.args[1],
uc.args[2], uc.args[3]);
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}

kvm_vm_free(vm);
return 0;
}

0 comments on commit b9172f9

Please sign in to comment.