Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix a problem with GICv3 userspace save/restore
   - Clarify GICv2 userspace save/restore ABI
   - Be more careful in clearing GIC LRs
   - Add missing synchronization primitive to our MMU handling code

  PPC:
   - Check for a NULL return from kzalloc

  s390:
   - Prevent translation exception errors on valid page tables for the
     instruction-exection-protection support

  x86:
   - Fix Page-Modification Logging when running a nested guest"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: Book3S HV: Check for kmalloc errors in ioctl
  KVM: nVMX: initialize PML fields in vmcs02
  KVM: nVMX: do not leak PML full vmexit to L1
  KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI
  KVM: arm64: Ensure LRs are clear when they should be
  kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd
  KVM: s390: remove change-recording override support
  arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region
  arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm
  • Loading branch information
torvalds committed Apr 8, 2017
2 parents 62fedca + 8786fa6 commit 542380a
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 15 deletions.
6 changes: 6 additions & 0 deletions Documentation/virtual/kvm/devices/arm-vgic.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ Groups:

Bits for undefined preemption levels are RAZ/WI.

For historical reasons and to provide ABI compatibility with userspace we
export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
field in the lower 5 bits of a word, meaning that userspace must always
use the lower 5 bits to communicate with the KVM device and must shift the
value left by 3 places to obtain the actual priority mask level.

Limitations:
- Priorities are not implemented, and registers are RAZ/WI
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
Expand Down
3 changes: 3 additions & 0 deletions arch/arm/kvm/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void)
if (__hyp_get_vectors() == hyp_default_vectors)
cpu_init_hyp_mode(NULL);
}

if (vgic_present)
kvm_vgic_init_cpu_hardware();
}

static void cpu_hyp_reset(void)
Expand Down
23 changes: 20 additions & 3 deletions arch/arm/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
phys_addr_t addr = start, end = start + size;
phys_addr_t next;

assert_spin_locked(&kvm->mmu_lock);
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
next = stage2_pgd_addr_end(addr, end);
if (!stage2_pgd_none(*pgd))
unmap_stage2_puds(kvm, pgd, addr, next);
/*
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}

Expand Down Expand Up @@ -803,13 +810,15 @@ void stage2_unmap_vm(struct kvm *kvm)
int idx;

idx = srcu_read_lock(&kvm->srcu);
down_read(&current->mm->mmap_sem);
spin_lock(&kvm->mmu_lock);

slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
stage2_unmap_memslot(kvm, memslot);

spin_unlock(&kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
srcu_read_unlock(&kvm->srcu, idx);
}

Expand All @@ -829,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
if (kvm->arch.pgd == NULL)
return;

spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
spin_unlock(&kvm->mmu_lock);

/* Free the HW pgd, one page at a time */
free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
kvm->arch.pgd = NULL;
Expand Down Expand Up @@ -1801,6 +1813,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
(KVM_PHYS_SIZE >> PAGE_SHIFT))
return -EFAULT;

down_read(&current->mm->mmap_sem);
/*
* A memory region could potentially cover multiple VMAs, and any holes
* between them, so iterate over all of them to find out if we can map
Expand Down Expand Up @@ -1844,8 +1857,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
pa += vm_start - vma->vm_start;

/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
return -EINVAL;
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}

ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
vm_end - vm_start,
Expand All @@ -1857,14 +1872,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
} while (hva < reg_end);

if (change == KVM_MR_FLAGS_ONLY)
return ret;
goto out;

spin_lock(&kvm->mmu_lock);
if (ret)
unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
else
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
out:
up_read(&current->mm->mmap_sem);
return ret;
}

Expand Down
4 changes: 4 additions & 0 deletions arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
/* start new resize */

resize = kzalloc(sizeof(*resize), GFP_KERNEL);
if (!resize) {
ret = -ENOMEM;
goto out;
}
resize->order = shift;
resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work);
Expand Down
7 changes: 2 additions & 5 deletions arch/s390/kvm/gaccess.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ union page_table_entry {
unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long co : 1; /* Change-Recording Override */
unsigned long : 8;
unsigned long : 9;
};
};

Expand Down Expand Up @@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_PAGE_TRANSLATION;
if (pte.z)
return PGM_TRANSLATION_SPEC;
if (pte.co && !edat1)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p;
raddr.pfra = pte.pfra;
real_address:
Expand Down Expand Up @@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION;
if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
if (!rc && pte.z)
rc = PGM_TRANSLATION_SPEC;
shadow_page:
pte.p |= dat_protection;
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
case EXIT_REASON_PREEMPTION_TIMER:
return false;
case EXIT_REASON_PML_FULL:
/* We don't expose PML support to L1. */
return false;
default:
return true;
}
Expand Down Expand Up @@ -10267,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,

}

if (enable_pml) {
/*
* Conceptually we want to copy the PML address and index from
* vmcs01 here, and then back to vmcs01 on nested vmexit. But,
* since we always flush the log on each vmexit, this happens
* to be equivalent to simply resetting the fields in vmcs02.
*/
ASSERT(vmx->pml_pg);
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}

if (nested_cpu_has_ept(vmcs12)) {
kvm_mmu_unload(vcpu);
nested_ept_init_mmu_context(vcpu);
Expand Down
1 change: 1 addition & 0 deletions include/kvm/arm_vgic.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);

int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level);
Expand Down
3 changes: 3 additions & 0 deletions include/linux/irqchip/arm-gic.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@
#define GICH_MISR_EOI (1 << 0)
#define GICH_MISR_U (1 << 1)

#define GICV_PMR_PRIORITY_SHIFT 3
#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT)

#ifndef __ASSEMBLY__

#include <linux/irqdomain.h>
Expand Down
19 changes: 19 additions & 0 deletions virt/kvm/arm/vgic/vgic-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}

/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
* For a specific CPU, initialize the GIC VE hardware.
*/
void kvm_vgic_init_cpu_hardware(void)
{
BUG_ON(preemptible());

/*
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_init_lrs();
else
kvm_call_hyp(__vgic_v3_init_lrs);
}

/**
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
* according to the host GIC model. Accordingly calls either
Expand Down
20 changes: 18 additions & 2 deletions virt/kvm/arm/vgic/vgic-mmio-v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
val = vmcr.ctlr;
break;
case GIC_CPU_PRIMASK:
val = vmcr.pmr;
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
*/
val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
GICV_PMR_PRIORITY_SHIFT;
break;
case GIC_CPU_BINPOINT:
val = vmcr.bpr;
Expand Down Expand Up @@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vmcr.ctlr = val;
break;
case GIC_CPU_PRIMASK:
vmcr.pmr = val;
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
*/
vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
GICV_PMR_PRIORITY_MASK;
break;
case GIC_CPU_BINPOINT:
vmcr.bpr = val;
Expand Down
23 changes: 19 additions & 4 deletions virt/kvm/arm/vgic/vgic-v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
return (unsigned long *)val;
}

static inline void vgic_v2_write_lr(int lr, u32 val)
{
void __iomem *base = kvm_vgic_global_state.vctrl_base;

writel_relaxed(val, base + GICH_LR0 + (lr * 4));
}

void vgic_v2_init_lrs(void)
{
int i;

for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
vgic_v2_write_lr(i, 0);
}

void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
Expand Down Expand Up @@ -191,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_MASK;
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
GICH_VMCR_BINPOINT_MASK;
vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
GICH_VMCR_PRIMASK_MASK;
vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;

vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
}
Expand All @@ -207,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_SHIFT;
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
GICH_VMCR_BINPOINT_SHIFT;
vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
GICH_VMCR_PRIMASK_SHIFT;
vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
}

void vgic_v2_enable(struct kvm_vcpu *vcpu)
Expand Down
11 changes: 10 additions & 1 deletion virt/kvm/arm/vgic/vgic.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
return irq->pending_latch || irq->line_level;
}

/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
* state to userspace can generate either GICv2 or GICv3 CPU interface
* registers regardless of the hardware backed GIC used.
*/
struct vgic_vmcr {
u32 ctlr;
u32 abpr;
u32 bpr;
u32 pmr;
u32 pmr; /* Priority mask field in the GICC_PMR and
* ICC_PMR_EL1 priority field format */
/* Below member variable are valid only for GICv3 */
u32 grpen0;
u32 grpen1;
Expand Down Expand Up @@ -130,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type);

void vgic_v2_init_lrs(void);

static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
if (irq->intid < VGIC_MIN_LPI)
Expand Down

0 comments on commit 542380a

Please sign in to comment.