Skip to content

Commit

Permalink
KVM: x86: hyper-v: L2 TLB flush
Browse files Browse the repository at this point in the history
Handle L2 TLB flush requests by going through all vCPUs and checking
whether there are vCPUs running the same VM_ID with a VP_ID specified
in the requests. Perform synthetic exit to L2 upon finish.

Note, while checking VM_ID/VP_ID of running vCPUs seem to be a bit
racy, we count on the fact that KVM flushes the whole L2 VPID upon
transition. Also, KVM_REQ_HV_TLB_FLUSH request needs to be done upon
transition between L1 and L2 to make sure all pending requests are
always processed.

For the reference, Hyper-V TLFS refers to the feature as "Direct
Virtual Flush".

Note, nVMX/nSVM code does not handle VMCALL/VMMCALL from L2 yet.

Reviewed-by: Sean Christopherson <[email protected]>
Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
Message-Id: <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
  • Loading branch information
vittyvk authored and bonzini committed Nov 18, 2022
1 parent 3c9eb06 commit c58a318
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 24 deletions.
80 changes: 67 additions & 13 deletions arch/x86/kvm/hyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <linux/eventfd.h>

#include <asm/apicdef.h>
#include <asm/mshyperv.h>
#include <trace/events/kvm.h>

#include "trace.h"
Expand Down Expand Up @@ -1832,18 +1833,16 @@ static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc
entries, consumed_xmm_halves, offset);
}

static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count)
static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo,
u64 *entries, int count)
{
struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;

if (!hv_vcpu)
return;

/* kvm_hv_flush_tlb() is not ready to handle requests for L2s yet */
tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo[HV_L1_TLB_FLUSH_FIFO];

spin_lock(&tlb_flush_fifo->write_lock);

/*
Expand Down Expand Up @@ -1912,6 +1911,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
/*
* Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
* entries on the TLB flush fifo. The last entry, however, needs to be
Expand Down Expand Up @@ -1962,7 +1962,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
}

trace_kvm_hv_flush_tlb(flush.processor_mask,
flush.address_space, flush.flags);
flush.address_space, flush.flags,
is_guest_mode(vcpu));

valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
Expand Down Expand Up @@ -1993,7 +1994,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
flush_ex.hv_vp_set.format,
flush_ex.address_space,
flush_ex.flags);
flush_ex.flags, is_guest_mode(vcpu));

valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
all_cpus = flush_ex.hv_vp_set.format !=
Expand Down Expand Up @@ -2037,19 +2038,57 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
* analyze it here, flush TLB regardless of the specified address space.
*/
if (all_cpus) {
kvm_for_each_vcpu(i, v, kvm)
hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt);
if (all_cpus && !is_guest_mode(vcpu)) {
kvm_for_each_vcpu(i, v, kvm) {
tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
hv_tlb_flush_enqueue(v, tlb_flush_fifo,
tlb_flush_entries, hc->rep_cnt);
}

kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
} else {
} else if (!is_guest_mode(vcpu)) {
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);

for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
v = kvm_get_vcpu(kvm, i);
if (!v)
continue;
hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt);
tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
hv_tlb_flush_enqueue(v, tlb_flush_fifo,
tlb_flush_entries, hc->rep_cnt);
}

kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
} else {
struct kvm_vcpu_hv *hv_v;

bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);

kvm_for_each_vcpu(i, v, kvm) {
hv_v = to_hv_vcpu(v);

/*
* The following check races with nested vCPUs entering/exiting
* and/or migrating between L1's vCPUs, however the only case when
* KVM *must* flush the TLB is when the target L2 vCPU keeps
* running on the same L1 vCPU from the moment of the request until
* kvm_hv_flush_tlb() returns. TLB is fully flushed in all other
* cases, e.g. when the target L2 vCPU migrates to a different L1
* vCPU or when the corresponding L1 vCPU temporary switches to a
* different L2 vCPU while the request is being processed.
*/
if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id)
continue;

if (!all_cpus &&
!hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask,
sparse_banks))
continue;

__set_bit(i, vcpu_mask);
tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, true);
hv_tlb_flush_enqueue(v, tlb_flush_fifo,
tlb_flush_entries, hc->rep_cnt);
}

kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
Expand Down Expand Up @@ -2239,10 +2278,25 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)

static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
u32 tlb_lock_count = 0;
int ret;

if (hv_result_success(result) && is_guest_mode(vcpu) &&
kvm_hv_is_tlb_flush_hcall(vcpu) &&
kvm_read_guest(vcpu->kvm, to_hv_vcpu(vcpu)->nested.pa_page_gpa,
&tlb_lock_count, sizeof(tlb_lock_count)))
result = HV_STATUS_INVALID_HYPERCALL_INPUT;

trace_kvm_hv_hypercall_done(result);
kvm_hv_hypercall_set_result(vcpu, result);
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);

ret = kvm_skip_emulated_instruction(vcpu);

if (tlb_lock_count)
kvm_x86_ops.nested_ops->hv_inject_synthetic_vmexit_post_tlb_flush(vcpu);

return ret;
}

static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/kvm/hyperv.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,6 @@ static inline struct kvm_vcpu_hv_tlb_flush_fifo *kvm_hv_get_tlb_flush_fifo(struc
int i = is_guest_mode ? HV_L2_TLB_FLUSH_FIFO :
HV_L1_TLB_FLUSH_FIFO;

/* KVM does not handle L2 TLB flush requests yet */
WARN_ON_ONCE(i != HV_L1_TLB_FLUSH_FIFO);

return &hv_vcpu->tlb_flush_fifo[i];
}

Expand Down
21 changes: 13 additions & 8 deletions arch/x86/kvm/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -1547,51 +1547,56 @@ TRACE_EVENT(kvm_hv_timer_state,
* Tracepoint for kvm_hv_flush_tlb.
*/
TRACE_EVENT(kvm_hv_flush_tlb,
TP_PROTO(u64 processor_mask, u64 address_space, u64 flags),
TP_ARGS(processor_mask, address_space, flags),
TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode),
TP_ARGS(processor_mask, address_space, flags, guest_mode),

TP_STRUCT__entry(
__field(u64, processor_mask)
__field(u64, address_space)
__field(u64, flags)
__field(bool, guest_mode)
),

TP_fast_assign(
__entry->processor_mask = processor_mask;
__entry->address_space = address_space;
__entry->flags = flags;
__entry->guest_mode = guest_mode;
),

TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx",
TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s",
__entry->processor_mask, __entry->address_space,
__entry->flags)
__entry->flags, __entry->guest_mode ? "(L2)" : "")
);

/*
* Tracepoint for kvm_hv_flush_tlb_ex.
*/
TRACE_EVENT(kvm_hv_flush_tlb_ex,
TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags),
TP_ARGS(valid_bank_mask, format, address_space, flags),
TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode),
TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode),

TP_STRUCT__entry(
__field(u64, valid_bank_mask)
__field(u64, format)
__field(u64, address_space)
__field(u64, flags)
__field(bool, guest_mode)
),

TP_fast_assign(
__entry->valid_bank_mask = valid_bank_mask;
__entry->format = format;
__entry->address_space = address_space;
__entry->flags = flags;
__entry->guest_mode = guest_mode;
),

TP_printk("valid_bank_mask 0x%llx format 0x%llx "
"address_space 0x%llx flags 0x%llx",
"address_space 0x%llx flags 0x%llx %s",
__entry->valid_bank_mask, __entry->format,
__entry->address_space, __entry->flags)
__entry->address_space, __entry->flags,
__entry->guest_mode ? "(L2)" : "")
);

/*
Expand Down

0 comments on commit c58a318

Please sign in to comment.