Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:

 - Lots of bug fixes.

 - Fix virtualization of RDPID

 - Virtualization of DR6_BUS_LOCK, which on bare metal is new to this
   release

 - More nested virtualization migration fixes (nSVM and eVMCS)

 - Fix for KVM guest hibernation

 - Fix for warning in SEV-ES SRCU usage

 - Block KVM from loading on AMD machines with 5-level page tables, due
   to the APM not mentioning how host CR4.LA57 exactly impacts the
   guest.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (48 commits)
  KVM: SVM: Move GHCB unmapping to fix RCU warning
  KVM: SVM: Invert user pointer casting in SEV {en,de}crypt helpers
  kvm: Cap halt polling at kvm->max_halt_poll_ns
  tools/kvm_stat: Fix documentation typo
  KVM: x86: Prevent deadlock against tk_core.seq
  KVM: x86: Cancel pvclock_gtod_work on module removal
  KVM: x86: Prevent KVM SVM from loading on kernels with 5-level paging
  KVM: X86: Expose bus lock debug exception to guest
  KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit
  KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks
  KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX probing failed
  KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU model
  KVM: x86: Move uret MSR slot management to common x86
  KVM: x86: Export the number of uret MSRs to vendor modules
  KVM: VMX: Disable loading of TSX_CTRL MSR the more conventional way
  KVM: VMX: Use common x86's uret MSR list as the one true list
  KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list
  KVM: VMX: Configure list of user return MSRs at module init
  KVM: x86: Add support for RDPID without RDTSCP
  KVM: SVM: Probe and load MSR_TSC_AUX regardless of RDTSCP support in host
  ...
  • Loading branch information
torvalds committed May 10, 2021
2 parents 6efb943 + ce7ea0c commit 0aa099a
Show file tree
Hide file tree
Showing 25 changed files with 542 additions and 342 deletions.
4 changes: 2 additions & 2 deletions Documentation/virt/kvm/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
4.126 KVM_X86_SET_MSR_FILTER
----------------------------

:Capability: KVM_X86_SET_MSR_FILTER
:Capability: KVM_CAP_X86_MSR_FILTER
:Architectures: x86
:Type: vm ioctl
:Parameters: struct kvm_msr_filter
Expand Down Expand Up @@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
KVM_EXIT_X86_WRMSR exit notifications.

8.27 KVM_X86_SET_MSR_FILTER
8.27 KVM_CAP_X86_MSR_FILTER
---------------------------

:Architectures: x86
Expand Down
15 changes: 12 additions & 3 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
#define VALID_PAGE(x) ((x) != INVALID_PAGE)

#define UNMAPPED_GVA (~(gpa_t)0)
#define INVALID_GPA (~(gpa_t)0)

/* KVM Hugepage definitions for x86 */
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
Expand Down Expand Up @@ -199,6 +200,7 @@ enum x86_intercept_stage;

#define KVM_NR_DB_REGS 4

#define DR6_BUS_LOCK (1 << 11)
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
#define DR6_BT (1 << 15)
Expand All @@ -212,7 +214,7 @@ enum x86_intercept_stage;
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
*/
#define DR6_ACTIVE_LOW 0xffff0ff0
#define DR6_VOLATILE 0x0001e00f
#define DR6_VOLATILE 0x0001e80f
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)

#define DR7_BP_EN_MASK 0x000000ff
Expand Down Expand Up @@ -407,7 +409,7 @@ struct kvm_mmu {
u32 pkru_mask;

u64 *pae_root;
u64 *lm_root;
u64 *pml4_root;

/*
* check zero bits on shadow page table entries, these
Expand Down Expand Up @@ -1417,6 +1419,7 @@ struct kvm_arch_async_pf {
bool direct_map;
};

extern u32 __read_mostly kvm_nr_uret_msrs;
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
Expand Down Expand Up @@ -1775,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);

void kvm_define_user_return_msr(unsigned index, u32 msr);
int kvm_add_user_return_msr(u32 msr);
int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);

static inline bool kvm_is_supported_user_return_msr(u32 msr)
{
return kvm_find_user_return_msr(msr) >= 0;
}

u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);

Expand Down
10 changes: 2 additions & 8 deletions arch/x86/include/asm/kvm_para.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>

extern void kvmclock_init(void);

#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
Expand Down Expand Up @@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
}

#ifdef CONFIG_KVM_GUEST
void kvmclock_init(void);
void kvmclock_disable(void);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
void kvm_disable_steal_time(void);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);

DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
Expand Down Expand Up @@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
return 0;
}

static inline void kvm_disable_steal_time(void)
{
return;
}

static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
return false;
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags;
} smm;

__u16 pad;

__u32 flags;
__u64 preemption_timer_deadline;
};
Expand Down
129 changes: 83 additions & 46 deletions arch/x86/kernel/kvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/kprobes.h>
#include <linux/nmi.h>
#include <linux/swait.h>
#include <linux/syscore_ops.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
Expand All @@ -37,6 +38,7 @@
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
#include <asm/ptrace.h>
#include <asm/reboot.h>
#include <asm/svm.h>

DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
Expand Down Expand Up @@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)

wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
pr_info("setup async PF for cpu %d\n", smp_processor_id());
}

if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
Expand All @@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);

pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
pr_info("disable async PF for cpu %d\n", smp_processor_id());
}

static void kvm_pv_guest_cpu_reboot(void *unused)
static void kvm_disable_steal_time(void)
{
/*
* We disable PV EOI before we load a new kernel by kexec,
* since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
* New kernel can re-enable when it boots.
*/
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
kvm_disable_steal_time();
}
if (!has_steal_clock)
return;

static int kvm_pv_reboot_notify(struct notifier_block *nb,
unsigned long code, void *unused)
{
if (code == SYS_RESTART)
on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
return NOTIFY_DONE;
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}

static struct notifier_block kvm_pv_reboot_nb = {
.notifier_call = kvm_pv_reboot_notify,
};

static u64 kvm_steal_clock(int cpu)
{
u64 steal;
Expand All @@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
return steal;
}

void kvm_disable_steal_time(void)
{
if (!has_steal_clock)
return;

wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}

static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
{
early_set_memory_decrypted((unsigned long) ptr, size);
Expand Down Expand Up @@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
}
}

static void kvm_guest_cpu_offline(bool shutdown)
{
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
if (!shutdown)
apf_task_wake_all();
kvmclock_disable();
}

static int kvm_cpu_online(unsigned int cpu)
{
unsigned long flags;

local_irq_save(flags);
kvm_guest_cpu_init();
local_irq_restore(flags);
return 0;
}

#ifdef CONFIG_SMP

static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
Expand Down Expand Up @@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void)
kvm_spinlock_init();
}

static void kvm_guest_cpu_offline(void)
static int kvm_cpu_down_prepare(unsigned int cpu)
{
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
apf_task_wake_all();
unsigned long flags;

local_irq_save(flags);
kvm_guest_cpu_offline(false);
local_irq_restore(flags);
return 0;
}

static int kvm_cpu_online(unsigned int cpu)
#endif

static int kvm_suspend(void)
{
local_irq_disable();
kvm_guest_cpu_init();
local_irq_enable();
kvm_guest_cpu_offline(false);

return 0;
}

static int kvm_cpu_down_prepare(unsigned int cpu)
static void kvm_resume(void)
{
local_irq_disable();
kvm_guest_cpu_offline();
local_irq_enable();
return 0;
kvm_cpu_online(raw_smp_processor_id());
}

static struct syscore_ops kvm_syscore_ops = {
.suspend = kvm_suspend,
.resume = kvm_resume,
};

static void kvm_pv_guest_cpu_reboot(void *unused)
{
kvm_guest_cpu_offline(true);
}

static int kvm_pv_reboot_notify(struct notifier_block *nb,
unsigned long code, void *unused)
{
if (code == SYS_RESTART)
on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
return NOTIFY_DONE;
}

static struct notifier_block kvm_pv_reboot_nb = {
.notifier_call = kvm_pv_reboot_notify,
};

/*
* After a PV feature is registered, the host will keep writing to the
* registered memory location. If the guest happens to shutdown, this memory
* won't be valid. In cases like kexec, in which you install a new kernel, this
* means a random memory location will be kept being written.
*/
#ifdef CONFIG_KEXEC_CORE
static void kvm_crash_shutdown(struct pt_regs *regs)
{
kvm_guest_cpu_offline(true);
native_machine_crash_shutdown(regs);
}
#endif

static void __init kvm_guest_init(void)
Expand Down Expand Up @@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
kvm_guest_cpu_init();
#endif

#ifdef CONFIG_KEXEC_CORE
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif

register_syscore_ops(&kvm_syscore_ops);

/*
* Hard lockup detection is enabled by default. Disable it, as guests
* can get false positives too easily, for example if the host is
Expand Down
26 changes: 1 addition & 25 deletions arch/x86/kernel/kvmclock.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
#include <asm/kvmclock.h>

static int kvmclock __initdata = 1;
Expand Down Expand Up @@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
}
#endif

/*
* After the clock is registered, the host will keep writing to the
* registered memory location. If the guest happens to shutdown, this memory
* won't be valid. In cases like kexec, in which you install a new kernel, this
* means a random memory location will be kept being written. So before any
* kind of shutdown from our side, we unregister the clock by writing anything
* that does not have the 'enable' bit set in the msr
*/
#ifdef CONFIG_KEXEC_CORE
static void kvm_crash_shutdown(struct pt_regs *regs)
{
native_write_msr(msr_kvm_system_time, 0, 0);
kvm_disable_steal_time();
native_machine_crash_shutdown(regs);
}
#endif

static void kvm_shutdown(void)
void kvmclock_disable(void)
{
native_write_msr(msr_kvm_system_time, 0, 0);
kvm_disable_steal_time();
native_machine_shutdown();
}

static void __init kvmclock_init_mem(void)
Expand Down Expand Up @@ -351,10 +331,6 @@ void __init kvmclock_init(void)
#endif
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC_CORE
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
kvm_get_preset_lpj();

/*
Expand Down
Loading

0 comments on commit 0aa099a

Please sign in to comment.