Skip to content

Commit

Permalink
KVM: x86: acknowledgment mechanism for async pf page ready notifications
Browse files Browse the repository at this point in the history
If two page ready notifications happen back to back the second one is not
delivered and the only mechanism we currently have is
kvm_check_async_pf_completion() check in vcpu_run() loop. The check will
only be performed with the next vmexit when it happens and in some cases
it may take a while. With interrupt based page ready notification delivery
the situation is even worse: unlike exceptions, interrupts are not handled
immediately so we must check if the slot is empty. This is slow and
unnecessary. Introduce dedicated MSR_KVM_ASYNC_PF_ACK MSR to communicate
the fact that the slot is free and host should check its notification
queue. Mandate using it for interrupt based 'page ready' APF event
delivery.

As kvm_check_async_pf_completion() is going away from vcpu_run() we need
a way to communicate the fact that vcpu->async_pf.done queue has
transitioned from empty to non-empty state. Introduce
kvm_arch_async_page_present_queued() and KVM_REQ_APF_READY to do the job.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
Message-Id: <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
  • Loading branch information
vittyvk authored and bonzini committed Jun 1, 2020
1 parent 2635b5c commit 557a961
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 5 deletions.
15 changes: 14 additions & 1 deletion Documentation/virt/kvm/msr.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,9 @@ data:
of these bytes is a token which was previously delivered as 'page not
present' event. The event indicates the page in now available. Guest is
supposed to write '0' to 'token' when it is done handling 'page ready'
event so the next one can be delivered.
event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
writing to the MSR forces KVM to re-scan its queue and deliver the next
pending notification.

Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
ready' APF delivery needs to be written to before enabling APF mechanism
Expand Down Expand Up @@ -359,3 +361,14 @@ data:
Interrupt vector for asynchnonous 'page ready' notifications delivery.
The vector has to be set up before asynchronous page fault mechanism
is enabled in MSR_KVM_ASYNC_PF_EN.

MSR_KVM_ASYNC_PF_ACK:
0x4b564d07

data:
Asynchronous page fault (APF) acknowledgment.

When the guest is done processing 'page ready' APF event and 'token'
field in 'struct kvm_vcpu_pv_apf_data' is cleared it is supposed to
write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
and check if there are more notifications pending.
2 changes: 2 additions & 0 deletions arch/s390/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);

static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}

void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
Expand Down
3 changes: 3 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)

#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
Expand Down Expand Up @@ -775,6 +776,7 @@ struct kvm_vcpu_arch {
u32 host_apf_flags;
unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;

/* OSVW MSRs (AMD only) */
Expand Down Expand Up @@ -1662,6 +1664,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);

Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/uapi/asm/kvm_para.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#define MSR_KVM_PV_EOI_EN 0x4b564d04
#define MSR_KVM_POLL_CONTROL 0x4b564d05
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07

struct kvm_steal_time {
__u64 steal;
Expand Down
26 changes: 22 additions & 4 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ static const u32 emulated_msrs_all[] = {
HV_X64_MSR_TSC_EMULATION_STATUS,

MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT,
MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,

MSR_IA32_TSC_ADJUST,
MSR_IA32_TSCDEADLINE,
Expand Down Expand Up @@ -2946,6 +2946,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pv_enable_async_pf_int(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
kvm_check_async_pf_completion(vcpu);
}
break;
case MSR_KVM_STEAL_TIME:

if (unlikely(!sched_info_on()))
Expand Down Expand Up @@ -3225,6 +3231,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KVM_ASYNC_PF_INT:
msr_info->data = vcpu->arch.apf.msr_int_val;
break;
case MSR_KVM_ASYNC_PF_ACK:
msr_info->data = 0;
break;
case MSR_KVM_STEAL_TIME:
msr_info->data = vcpu->arch.st.msr_val;
break;
Expand Down Expand Up @@ -8413,6 +8422,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_hv_process_stimers(vcpu);
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
kvm_vcpu_update_apicv(vcpu);
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
}

if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
Expand Down Expand Up @@ -8664,8 +8675,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
break;
}

kvm_check_async_pf_completion(vcpu);

if (signal_pending(current)) {
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
Expand Down Expand Up @@ -10555,13 +10564,22 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);

if (kvm_pv_async_pf_enabled(vcpu) &&
!apf_put_user_ready(vcpu, work->arch.token))
!apf_put_user_ready(vcpu, work->arch.token)) {
vcpu->arch.apf.pageready_pending = true;
kvm_apic_set_irq(vcpu, &irq, NULL);
}

vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}

void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
{
kvm_make_request(KVM_REQ_APF_READY, vcpu);
if (!vcpu->arch.apf.pageready_pending)
kvm_vcpu_kick(vcpu);
}

bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
{
if (!kvm_pv_async_pf_enabled(vcpu))
Expand Down
10 changes: 10 additions & 0 deletions virt/kvm/async_pf.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ static void async_pf_execute(struct work_struct *work)
unsigned long addr = apf->addr;
gpa_t cr2_or_gpa = apf->cr2_or_gpa;
int locked = 1;
bool first;

might_sleep();

Expand All @@ -69,10 +70,14 @@ static void async_pf_execute(struct work_struct *work)
kvm_arch_async_page_present(vcpu, apf);

spin_lock(&vcpu->async_pf.lock);
first = list_empty(&vcpu->async_pf.done);
list_add_tail(&apf->link, &vcpu->async_pf.done);
apf->vcpu = NULL;
spin_unlock(&vcpu->async_pf.lock);

if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
kvm_arch_async_page_present_queued(vcpu);

/*
* apf may be freed by kvm_check_async_pf_completion() after
* this point
Expand Down Expand Up @@ -201,6 +206,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
bool first;

if (!list_empty_careful(&vcpu->async_pf.done))
return 0;
Expand All @@ -213,9 +219,13 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&work->queue); /* for list_del to work */

spin_lock(&vcpu->async_pf.lock);
first = list_empty(&vcpu->async_pf.done);
list_add_tail(&work->link, &vcpu->async_pf.done);
spin_unlock(&vcpu->async_pf.lock);

if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
kvm_arch_async_page_present_queued(vcpu);

vcpu->async_pf.queued++;
return 0;
}

0 comments on commit 557a961

Please sign in to comment.