Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix handling of the 32bit cycle counter
   - Fix cycle counter filtering

  x86:
   - Fix a race leading to double unregistering of user notifiers
   - Amend oversight in kvm_arch_set_irq that turned Hyper-V code dead
   - Use SRCU around kvm_lapic_set_vapic_addr
   - Avoid recursive flushing of asynchronous page faults
   - Do not rely on deferred update in KVM_GET_CLOCK, which fixes #GP
   - Let userspace know that KVM_GET_CLOCK is useful with master clock;
     4.9 changed the return value to better match the guest clock, but
     didn't provide means to let guests take advantage of it"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic
  KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr
  KVM: async_pf: avoid recursive flushing of work items
  kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use
  KVM: Disable irq while unregistering user notifier
  KVM: x86: do not go through vcpu in __get_kvmclock_ns
  KVM: arm64: Fix the issues when guest PMCCFILTR is configured
  arm64: KVM: pmu: Fix AArch32 cycle counter access
  • Loading branch information
torvalds committed Nov 19, 2016
2 parents f691838 + a2b0773 commit dce9ce3
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 60 deletions.
11 changes: 11 additions & 0 deletions Documentation/virtual/kvm/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.

When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
set of bits that KVM can return in struct kvm_clock_data's flag member.

The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
value is the exact kvmclock value seen by all VCPUs at the instant
when KVM_GET_CLOCK was called. If clear, the returned value is simply
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
but the exact value read by each VCPU could differ, because the host
TSC is not stable.

struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
Expand Down
10 changes: 9 additions & 1 deletion arch/arm64/include/asm/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,15 @@
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */

#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
/*
* PMUv3 event types: required events
*/
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12

/*
* Event filters for PMUv3
Expand Down
10 changes: 1 addition & 9 deletions arch/arm64/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,9 @@

/*
* ARMv8 PMUv3 Performance Events handling code.
* Common event types.
* Common event types (some are defined in asm/perf_event.h).
*/

/* Required events. */
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12

/* At least one of the following is required. */
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
Expand Down
10 changes: 8 additions & 2 deletions arch/arm64/kvm/sys_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -597,16 +597,22 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,

idx = ARMV8_PMU_CYCLE_IDX;
} else {
BUG();
return false;
}
} else if (r->CRn == 0 && r->CRm == 9) {
/* PMCCNTR */
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;

idx = ARMV8_PMU_CYCLE_IDX;
} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
/* PMEVCNTRn_EL0 */
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;

idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
} else {
BUG();
return false;
}

if (!pmu_counter_idx_valid(vcpu, idx))
Expand Down
58 changes: 27 additions & 31 deletions arch/x86/kvm/irq_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,25 +156,43 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
}


static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
if (!level)
return -1;

return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}

int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
struct kvm_lapic_irq irq;
int r;

if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
return -EWOULDBLOCK;
switch (e->type) {
case KVM_IRQ_ROUTING_HV_SINT:
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
line_status);

if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
case KVM_IRQ_ROUTING_MSI:
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;

kvm_set_msi_irq(kvm, e, &irq);
kvm_set_msi_irq(kvm, e, &irq);

if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r;
else
return -EWOULDBLOCK;
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r;
break;

default:
break;
}

return -EWOULDBLOCK;
}

int kvm_request_irq_source_id(struct kvm *kvm)
Expand Down Expand Up @@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
srcu_read_unlock(&kvm->irq_srcu, idx);
}

static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
if (!level)
return -1;

return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}

int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
Expand Down Expand Up @@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
srcu_read_unlock(&kvm->irq_srcu, idx);
}

int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
int irq_source_id, int level, bool line_status)
{
switch (irq->type) {
case KVM_IRQ_ROUTING_HV_SINT:
return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
line_status);
default:
return -EWOULDBLOCK;
}
}

void kvm_arch_irq_routing_update(struct kvm *kvm)
{
kvm_hv_irq_routing_update(kvm);
Expand Down
47 changes: 34 additions & 13 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,16 +210,25 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
struct kvm_shared_msrs *locals
= container_of(urn, struct kvm_shared_msrs, urn);
struct kvm_shared_msr_values *values;
unsigned long flags;

/*
* Disabling irqs at this point since the following code could be
* interrupted and executed through kvm_arch_hardware_disable()
*/
local_irq_save(flags);
if (locals->registered) {
locals->registered = false;
user_return_notifier_unregister(urn);
}
local_irq_restore(flags);
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
values = &locals->values[slot];
if (values->host != values->curr) {
wrmsrl(shared_msrs_global.msrs[slot], values->host);
values->curr = values->host;
}
}
locals->registered = false;
user_return_notifier_unregister(urn);
}

static void shared_msr_update(unsigned slot, u32 msr)
Expand Down Expand Up @@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)

static u64 __get_kvmclock_ns(struct kvm *kvm)
{
struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
struct kvm_arch *ka = &kvm->arch;
s64 ns;
struct pvclock_vcpu_time_info hv_clock;

if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
} else {
ns = ktime_get_boot_ns() + ka->kvmclock_offset;
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
return ktime_get_boot_ns() + ka->kvmclock_offset;
}

return ns;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
spin_unlock(&ka->pvclock_gtod_sync_lock);

kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
return __pvclock_read_cycles(&hv_clock, rdtsc());
}

u64 get_kvmclock_ns(struct kvm *kvm)
Expand Down Expand Up @@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_XEN_HVM:
case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_HYPERV:
case KVM_CAP_HYPERV_VAPIC:
Expand All @@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
Expand Down Expand Up @@ -3415,14 +3431,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
};
case KVM_SET_VAPIC_ADDR: {
struct kvm_vapic_addr va;
int idx;

r = -EINVAL;
if (!lapic_in_kernel(vcpu))
goto out;
r = -EFAULT;
if (copy_from_user(&va, argp, sizeof va))
goto out;
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_X86_SETUP_MCE: {
Expand Down Expand Up @@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
struct kvm_clock_data user_ns;
u64 now_ns;

now_ns = get_kvmclock_ns(kvm);
local_irq_disable();
now_ns = __get_kvmclock_ns(kvm);
user_ns.clock = now_ns;
user_ns.flags = 0;
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
local_irq_enable();
memset(&user_ns.pad, 0, sizeof(user_ns.pad));

r = -EFAULT;
Expand Down
7 changes: 7 additions & 0 deletions include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -972,12 +972,19 @@ struct kvm_irqfd {
__u8 pad[16];
};

/* For KVM_CAP_ADJUST_CLOCK */

/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
#define KVM_CLOCK_TSC_STABLE 2

struct kvm_clock_data {
__u64 clock;
__u32 flags;
__u32 pad[9];
};

/* For KVM_CAP_SW_TLB */

#define KVM_MMU_FSL_BOOKE_NOHV 0
#define KVM_MMU_FSL_BOOKE_HV 1

Expand Down
8 changes: 5 additions & 3 deletions virt/kvm/arm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
continue;
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
& ARMV8_PMU_EVTYPE_EVENT;
if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
&& (enable & BIT(i))) {
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
Expand Down Expand Up @@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;

/* Software increment event does't need to be backed by a perf event */
if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
select_idx != ARMV8_PMU_CYCLE_IDX)
return;

memset(&attr, 0, sizeof(struct perf_event_attr));
Expand All @@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = eventsel;
attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;

counter = kvm_pmu_get_counter_value(vcpu, select_idx);
/* The initial sample period (overflow count) of an event. */
Expand Down
13 changes: 12 additions & 1 deletion virt/kvm/async_pf.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work)

spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
apf->vcpu = NULL;
spin_unlock(&vcpu->async_pf.lock);

/*
Expand All @@ -113,13 +114,23 @@ static void async_pf_execute(struct work_struct *work)

void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->async_pf.lock);

/* cancel outstanding work queue item */
while (!list_empty(&vcpu->async_pf.queue)) {
struct kvm_async_pf *work =
list_first_entry(&vcpu->async_pf.queue,
typeof(*work), queue);
list_del(&work->queue);

/*
* We know it's present in vcpu->async_pf.done, do
* nothing here.
*/
if (!work->vcpu)
continue;

spin_unlock(&vcpu->async_pf.lock);
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work(&work->work);
#else
Expand All @@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
kmem_cache_free(async_pf_cache, work);
}
#endif
spin_lock(&vcpu->async_pf.lock);
}

spin_lock(&vcpu->async_pf.lock);
while (!list_empty(&vcpu->async_pf.done)) {
struct kvm_async_pf *work =
list_first_entry(&vcpu->async_pf.done,
Expand Down

0 comments on commit dce9ce3

Please sign in to comment.