Skip to content

Commit

Permalink
KVM: SEV: add cache flush to solve SEV cache incoherency issues
Browse files Browse the repository at this point in the history
Flush the CPU caches when memory is reclaimed from an SEV guest (where
reclaim also includes it being unmapped from KVM's memslots).  Due to lack
of coherency for SEV encrypted memory, failure to flush results in silent
data corruption if userspace is malicious/broken and doesn't ensure SEV
guest memory is properly pinned and unpinned.

Cache coherency is not enforced across the VM boundary in SEV (AMD APM
vol.2 Section 15.34.7). Confidential cachelines, generated by confidential
VM guests have to be explicitly flushed on the host side. If a memory page
containing dirty confidential cachelines was released by VM and reallocated
to another user, the cachelines may corrupt the new user at a later time.

KVM takes a shortcut by assuming all confidential memory remain pinned
until the end of VM lifetime. Therefore, KVM does not flush cache at
mmu_notifier invalidation events. Because of this incorrect assumption and
the lack of cache flushing, malicous userspace can crash the host kernel:
creating a malicious VM and continuously allocates/releases unpinned
confidential memory pages when the VM is running.

Add cache flush operations to mmu_notifier operations to ensure that any
physical memory leaving the guest VM get flushed. In particular, hook
mmu_notifier_invalidate_range_start and mmu_notifier_release events and
flush cache accordingly. The hook after releasing the mmu lock to avoid
contention with other vCPUs.

Cc: [email protected]
Suggested-by: Sean Christpherson <[email protected]>
Reported-by: Mingwei Zhang <[email protected]>
Signed-off-by: Mingwei Zhang <[email protected]>
Message-Id: <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
  • Loading branch information
mzhang3579 authored and bonzini committed Apr 21, 2022
1 parent d45829b commit 683412c
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 3 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm-x86-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
KVM_X86_OP(get_msr_feature)
KVM_X86_OP(can_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -1484,6 +1484,7 @@ struct kvm_x86_ops {
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
void (*guest_memory_reclaimed)(struct kvm *kvm);

int (*get_msr_feature)(struct kvm_msr_entry *entry);

Expand Down
8 changes: 8 additions & 0 deletions arch/x86/kvm/svm/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2262,6 +2262,14 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
wbinvd_on_all_cpus();
}

void sev_guest_memory_reclaimed(struct kvm *kvm)
{
if (!sev_guest(kvm))
return;

wbinvd_on_all_cpus();
}

void sev_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -4620,6 +4620,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
.mem_enc_unregister_region = sev_mem_enc_unregister_region,
.guest_memory_reclaimed = sev_guest_memory_reclaimed,

.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
.vm_move_enc_context_from = sev_vm_move_enc_context_from,
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/svm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
struct kvm_enc_region *range);
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
void sev_guest_memory_reclaimed(struct kvm *kvm);

void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
Expand Down
5 changes: 5 additions & 0 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -9889,6 +9889,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}

void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{
static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
}

static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
Expand Down
2 changes: 2 additions & 0 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -2219,6 +2219,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);

void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);

#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
#else
Expand Down
27 changes: 24 additions & 3 deletions virt/kvm/kvm_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}

__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{
}

bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
{
/*
Expand Down Expand Up @@ -357,6 +361,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif

static void kvm_flush_shadow_all(struct kvm *kvm)
{
kvm_arch_flush_shadow_all(kvm);
kvm_arch_guest_memory_reclaimed(kvm);
}

#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
gfp_t gfp_flags)
Expand Down Expand Up @@ -485,12 +495,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
unsigned long end);

typedef void (*on_unlock_fn_t)(struct kvm *kvm);

struct kvm_hva_range {
unsigned long start;
unsigned long end;
pte_t pte;
hva_handler_t handler;
on_lock_fn_t on_lock;
on_unlock_fn_t on_unlock;
bool flush_on_ret;
bool may_block;
};
Expand Down Expand Up @@ -578,8 +591,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
if (range->flush_on_ret && ret)
kvm_flush_remote_tlbs(kvm);

if (locked)
if (locked) {
KVM_MMU_UNLOCK(kvm);
if (!IS_KVM_NULL_FN(range->on_unlock))
range->on_unlock(kvm);
}

srcu_read_unlock(&kvm->srcu, idx);

Expand All @@ -600,6 +616,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
.pte = pte,
.handler = handler,
.on_lock = (void *)kvm_null_fn,
.on_unlock = (void *)kvm_null_fn,
.flush_on_ret = true,
.may_block = false,
};
Expand All @@ -619,6 +636,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
.pte = __pte(0),
.handler = handler,
.on_lock = (void *)kvm_null_fn,
.on_unlock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = false,
};
Expand Down Expand Up @@ -687,6 +705,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
.pte = __pte(0),
.handler = kvm_unmap_gfn_range,
.on_lock = kvm_inc_notifier_count,
.on_unlock = kvm_arch_guest_memory_reclaimed,
.flush_on_ret = true,
.may_block = mmu_notifier_range_blockable(range),
};
Expand Down Expand Up @@ -741,6 +760,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
.pte = __pte(0),
.handler = (void *)kvm_null_fn,
.on_lock = kvm_dec_notifier_count,
.on_unlock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = mmu_notifier_range_blockable(range),
};
Expand Down Expand Up @@ -813,7 +833,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
int idx;

idx = srcu_read_lock(&kvm->srcu);
kvm_arch_flush_shadow_all(kvm);
kvm_flush_shadow_all(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}

Expand Down Expand Up @@ -1225,7 +1245,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
kvm->mn_active_invalidate_count = 0;
#else
kvm_arch_flush_shadow_all(kvm);
kvm_flush_shadow_all(kvm);
#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
Expand Down Expand Up @@ -1652,6 +1672,7 @@ static void kvm_invalidate_memslot(struct kvm *kvm,
* - kvm_is_visible_gfn (mmu_check_root)
*/
kvm_arch_flush_shadow_memslot(kvm, old);
kvm_arch_guest_memory_reclaimed(kvm);

/* Was released by kvm_swap_active_memslots, reacquire. */
mutex_lock(&kvm->slots_arch_lock);
Expand Down

0 comments on commit 683412c

Please sign in to comment.