Skip to content

Commit

Permalink
Merge tag 'kvm-x86-misc-6.4' of https://github.com/kvm-x86/linux into…
Browse files Browse the repository at this point in the history
… HEAD

KVM x86 changes for 6.4:

 - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled,
   and by giving the guest control of CR0.WP when EPT is enabled on VMX
   (VMX-only because SVM doesn't support per-bit controls)

 - Add CR0/CR4 helpers to query single bits, and clean up related code
   where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return
   as a bool

 - Move AMD_PSFD to cpufeatures.h and purge KVM's definition

 - Misc cleanups
  • Loading branch information
bonzini committed Apr 26, 2023
2 parents e1a6d5c + cf9f4c0 commit a1c288f
Show file tree
Hide file tree
Showing 13 changed files with 139 additions and 52 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */

Expand Down
12 changes: 3 additions & 9 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}

/*
* This one is tied to SSB in the user API, and not
* visible in /proc/cpuinfo.
*/
#define KVM_X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */

#define F feature_bit

/* Scattered Flag - For features that are scattered by cpufeatures.h. */
Expand Down Expand Up @@ -266,7 +260,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));

cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
Expand All @@ -275,7 +269,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
best = cpuid_entry2_find(entries, nent, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));

best = cpuid_entry2_find(entries, nent, 0xD, 0);
if (best)
Expand Down Expand Up @@ -715,7 +709,7 @@ void kvm_set_cpu_caps(void)
F(CLZERO) | F(XSAVEERPTR) |
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
__feature_bit(KVM_X86_FEATURE_AMD_PSFD)
F(AMD_PSFD)
);

/*
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/kvm/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1640,6 +1640,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
goto exception;
break;
case VCPU_SREG_CS:
/*
* KVM uses "none" when loading CS as part of emulating Real
* Mode exceptions and IRET (handled above). In all other
* cases, loading CS without a control transfer is a KVM bug.
*/
if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
goto exception;

if (!(seg_desc.type & 8))
goto exception;

Expand Down
18 changes: 17 additions & 1 deletion arch/x86/kvm/kvm_cache_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <linux/kvm_host.h>

#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP)
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
Expand Down Expand Up @@ -157,6 +157,14 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
return vcpu->arch.cr0 & mask;
}

static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu,
unsigned long cr0_bit)
{
BUILD_BUG_ON(!is_power_of_2(cr0_bit));

return !!kvm_read_cr0_bits(vcpu, cr0_bit);
}

static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
{
return kvm_read_cr0_bits(vcpu, ~0UL);
Expand All @@ -171,6 +179,14 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
return vcpu->arch.cr4 & mask;
}

static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu,
unsigned long cr4_bit)
{
BUILD_BUG_ON(!is_power_of_2(cr4_bit));

return !!kvm_read_cr4_bits(vcpu, cr4_bit);
}

static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
Expand Down
28 changes: 26 additions & 2 deletions arch/x86/kvm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu);

int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
Expand All @@ -132,7 +134,7 @@ static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
{
BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);

return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
return kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)
? cr3 & X86_CR3_PCID_MASK
: 0;
}
Expand All @@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->root_role.level);
}

static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu)
{
/*
* When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
* @mmu's snapshot of CR0.WP and thus all related paging metadata may
* be stale. Refresh CR0.WP and the metadata on-demand when checking
* for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing
* nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does
* need to refresh nested_mmu, a.k.a. the walker used to translate L2
* GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
*/
if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
return;

__kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
}

/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
Expand Down Expand Up @@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
bool fault = (mmu->permissions[index] >> pte_access) & 1;
u32 errcode = PFERR_PRESENT_MASK;
bool fault;

kvm_mmu_refresh_passthrough_bits(vcpu, mmu);

fault = (mmu->permissions[index] >> pte_access) & 1;

WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
if (unlikely(mmu->pkru_mask)) {
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -5112,6 +5112,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
return role;
}

void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu)
{
const bool cr0_wp = kvm_is_cr0_bit_set(vcpu, X86_CR0_WP);

BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP);
BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS));

if (is_cr0_wp(mmu) == cr0_wp)
return;

mmu->cpu_role.base.cr0_wp = cr0_wp;
reset_guest_paging_metadata(vcpu, mmu);
}

static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
{
/* tdp_root_level is architecture forced level, use it if nonzero */
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kvm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,9 +540,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (!pmc)
return 1;

if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCE) &&
(static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
(kvm_read_cr0(vcpu) & X86_CR0_PE))
kvm_is_cr0_bit_set(vcpu, X86_CR0_PE))
return 1;

*data = pmc_read_counter(pmc) & mask;
Expand Down
6 changes: 2 additions & 4 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -4522,7 +4522,6 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
{
bool smep, smap, is_user;
unsigned long cr4;
u64 error_code;

/* Emulation is always possible when KVM has access to all guest state. */
Expand Down Expand Up @@ -4614,9 +4613,8 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
goto resume_guest;

cr4 = kvm_read_cr4(vcpu);
smep = cr4 & X86_CR4_SMEP;
smap = cr4 & X86_CR4_SMAP;
smep = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMEP);
smap = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMAP);
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
pr_err_ratelimited("SEV Guest triggered AMD Erratum 1096\n");
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kvm/vmx/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -4481,7 +4481,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
* (KVM doesn't change it);
*/
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs12->host_cr0);

/* Same as above - no reason to call set_cr4_guest_host_mask(). */
Expand Down Expand Up @@ -4632,7 +4632,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
*/
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));

vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));

vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
Expand Down Expand Up @@ -5154,7 +5154,7 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
* does force CR0.PE=1, but only to also force VM86 in order to emulate
* Real Mode, and so there's no need to check CR0.PE manually.
*/
if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
Expand Down
8 changes: 4 additions & 4 deletions arch/x86/kvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -4746,7 +4746,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
/* 22.2.1, 20.8.1 */
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());

vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);

set_cr4_guest_host_mask(vmx);
Expand Down Expand Up @@ -5136,7 +5136,7 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
return true;

return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) &&
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
}

Expand Down Expand Up @@ -5473,7 +5473,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
break;
case 3: /* lmsw */
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
kvm_lmsw(vcpu, val);

return kvm_skip_emulated_instruction(vcpu);
Expand Down Expand Up @@ -7531,7 +7531,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;

if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cache = MTRR_TYPE_WRBACK;
else
Expand Down
18 changes: 18 additions & 0 deletions arch/x86/kvm/vmx/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
(1 << VCPU_EXREG_EXIT_INFO_1) | \
(1 << VCPU_EXREG_EXIT_INFO_2))

static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
{
unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;

/*
* CR0.WP needs to be intercepted when KVM is shadowing legacy paging
* in order to construct shadow PTEs with the correct protections.
* Note! CR0.WP technically can be passed through to the guest if
* paging is disabled, but checking CR0.PG would generate a cyclical
* dependency of sorts due to forcing the caller to ensure CR0 holds
* the correct value prior to determining which CR0 bits can be owned
* by L1. Keep it simple and limit the optimization to EPT.
*/
if (!enable_ept)
bits &= ~X86_CR0_WP;
return bits;
}

static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
return container_of(kvm, struct kvm_vmx, kvm);
Expand Down
Loading

0 comments on commit a1c288f

Please sign in to comment.