From ec6449a9c2296b1c04f6219f7473e0c2fedecfed Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 20 Nov 2017 12:10:15 +0100 Subject: [PATCH 01/28] KVM: arm/arm64: Don't enable/disable physical timer access on VHE After the timer optimization rework we accidentally end up calling physical timer enable/disable functions on VHE systems, which is neither needed nor correct, since the CNTHCTL_EL2 register format is different when HCR_EL2.E2H is set. The CNTHCTL_EL2 is initialized when CPUs become online in kvm_timer_init_vhe() and we don't have to call these functions on VHE systems, which also allows us to inline the non-VHE functionality. Reported-by: Jintack Lim Signed-off-by: Christoffer Dall --- include/kvm/arm_arch_timer.h | 3 --- virt/kvm/arm/arch_timer.c | 6 ----- virt/kvm/arm/hyp/timer-sr.c | 48 +++++++++++++++--------------------- 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 01ee473517e25b..6e45608b239981 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -93,7 +93,4 @@ void kvm_timer_init_vhe(void); #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) -void enable_el1_phys_timer_access(void); -void disable_el1_phys_timer_access(void); - #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4151250ce8da94..190c99ed1b7366 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -479,9 +479,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) vtimer_restore_state(vcpu); - if (has_vhe()) - disable_el1_phys_timer_access(); - /* Set the background timer for the physical timer emulation. */ phys_timer_emulate(vcpu); } @@ -510,9 +507,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) if (unlikely(!timer->enabled)) return; - if (has_vhe()) - enable_el1_phys_timer_access(); - vtimer_save_state(vcpu); /* diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index f39861639f08f8..f24404b3c8dff8 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -27,42 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) write_sysreg(cntvoff, cntvoff_el2); } -void __hyp_text enable_el1_phys_timer_access(void) -{ - u64 val; - - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); -} - -void __hyp_text disable_el1_phys_timer_access(void) -{ - u64 val; - - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); -} - void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) { /* * We don't need to do this for VHE since the host kernel runs in EL2 * with HCR_EL2.TGE ==1, which makes those bits have no impact. */ - if (!has_vhe()) - enable_el1_phys_timer_access(); + if (!has_vhe()) { + u64 val; + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + } } void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) { - if (!has_vhe()) - disable_el1_phys_timer_access(); + if (!has_vhe()) { + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + } } From 285a90e36b138b707c4a9850f2500774b7191c99 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 17 Nov 2017 17:58:21 +0000 Subject: [PATCH 02/28] KVM: arm/arm64: VGIC: extend !vgic_is_initialized guard Commit f39d16cbabf9 ("KVM: arm/arm64: Guard kvm_vgic_map_is_active against !vgic_initialized") introduced a check whether the VGIC has been initialized before accessing the spinlock and the VGIC data structure. However the vgic_get_irq() call in the variable declaration sneaked through the net, so lets make sure that this also gets called only after we actually allocated the arrays this function accesses. Reviewed-by: Eric Auger Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index b168a328a9e074..786cce7bd2ec62 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -823,13 +823,14 @@ void vgic_kick_vcpus(struct kvm *kvm) bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq; bool map_is_active; unsigned long flags; if (!vgic_initialized(vcpu->kvm)) return false; + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; spin_unlock_irqrestore(&irq->irq_lock, flags); From 150009e2c70cc3c6e97f00e7595055765d32fb85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:15 +0000 Subject: [PATCH 03/28] KVM: arm/arm64: vgic-irqfd: Fix MSI entry allocation Using the size of the structure we're allocating is a good idea and avoids any surprise... In this case, we're happilly confusing kvm_kernel_irq_routing_entry and kvm_irq_routing_entry... Fixes: 95b110ab9a09 ("KVM: arm/arm64: Enable irqchip routing") Cc: stable@vger.kernel.org # 4.8 Reported-by: AKASHI Takahiro Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-irqfd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index b7baf581611ae8..99e026d2dade9b 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -112,8 +112,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) u32 nr = dist->nr_spis; int i, ret; - entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), - GFP_KERNEL); + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; From ddb4b0102cb9cdd2398d98b3e1e024e08a2f4239 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:16 +0000 Subject: [PATCH 04/28] KVM: arm/arm64: vgic: Preserve the revious read from the pending table The current pending table parsing code assumes that we keep the previous read of the pending bits, but keep that variable in the current block, making sure it is discarded on each loop. We end-up using whatever is on the stack. Who knows, it might just be the right thing... Fixes: 280771252c1ba ("KVM: arm64: vgic-v3: KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES") Cc: stable@vger.kernel.org # 4.12 Reported-by: AKASHI Takahiro Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 2f05f732d3fd46..f47e8481fa452d 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -327,13 +327,13 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) int last_byte_offset = -1; struct vgic_irq *irq; int ret; + u8 val; list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { int byte_offset, bit_nr; struct kvm_vcpu *vcpu; gpa_t pendbase, ptr; bool stored; - u8 val; vcpu = irq->target_vcpu; if (!vcpu) From 64afe6e9eb4841f35317da4393de21a047a883b3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:17 +0000 Subject: [PATCH 05/28] KVM: arm/arm64: vgic-its: Preserve the revious read from the pending table The current pending table parsing code assumes that we keep the previous read of the pending bits, but keep that variable in the current block, making sure it is discarded on each loop. We end-up using whatever is on the stack. Who knows, it might just be the right thing... Fixes: 33d3bc9556a7d ("KVM: arm64: vgic-its: Read initial LPI pending table") Cc: stable@vger.kernel.org # 4.8 Reported-by: AKASHI Takahiro Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1f761a9991e7d6..cb2d0a2dbe5aff 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -421,6 +421,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) u32 *intids; int nr_irqs, i; unsigned long flags; + u8 pendmask; nr_irqs = vgic_copy_lpi_list(vcpu, &intids); if (nr_irqs < 0) @@ -428,7 +429,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) for (i = 0; i < nr_irqs; i++) { int byte_offset, bit_nr; - u8 pendmask; byte_offset = intids[i] / BITS_PER_BYTE; bit_nr = intids[i] % BITS_PER_BYTE; From 686f294f2f1ae40705283dd413ca1e4c14f20f93 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:18 +0000 Subject: [PATCH 06/28] KVM: arm/arm64: vgic-its: Check result of allocation before use We miss a test against NULL after allocation. Fixes: 6d03a68f8054 ("KVM: arm64: vgic-its: Turn device_id validation into generic ID validation") Cc: stable@vger.kernel.org # 4.8 Reported-by: AKASHI Takahiro Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index cb2d0a2dbe5aff..8e633bd9cc1e74 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -821,6 +821,8 @@ static int vgic_its_alloc_collection(struct vgic_its *its, return E_ITS_MAPC_COLLECTION_OOR; collection = kzalloc(sizeof(*collection), GFP_KERNEL); + if (!collection) + return -ENOMEM; collection->collection_id = coll_id; collection->target_addr = COLLECTION_NOT_MAPPED; From a05d1c0d03fd60ed487991e73850421e735c0135 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:19 +0000 Subject: [PATCH 07/28] KVM: arm/arm64: vgic-v4: Only perform an unmap for valid vLPIs Before performing an unmap, let's check that what we have was really mapped the first place. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 53c324aa44efa7..4a37292855bc7f 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -337,8 +337,10 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, goto out; WARN_ON(!(irq->hw && irq->host_irq == virq)); - irq->hw = false; - ret = its_unmap_vlpi(virq); + if (irq->hw) { + irq->hw = false; + ret = its_unmap_vlpi(virq); + } out: mutex_unlock(&its->its_lock); From 26aa7b3b1c0fb3f1a6176a0c1847204ef4355693 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Thu, 16 Nov 2017 17:58:20 +0000 Subject: [PATCH 08/28] arm64: KVM: fix VTTBR_BADDR_MASK BUG_ON off-by-one VTTBR_BADDR_MASK is used to sanity check the size and alignment of the VTTBR address. It seems to currently be off by one, thereby only allowing up to 47-bit addresses (instead of 48-bit) and also insufficiently checking the alignment. This patch fixes it. As an example, with 4k pages, before this patch we have: PHYS_MASK_SHIFT = 48 VTTBR_X = 37 - 24 = 13 VTTBR_BADDR_SHIFT = 13 - 1 = 12 VTTBR_BADDR_MASK = ((1 << 35) - 1) << 12 = 0x00007ffffffff000 Which is wrong, because the mask doesn't allow bit 47 of the VTTBR address to be set, and only requires the address to be 12-bit (4k) aligned, while it actually needs to be 13-bit (8k) aligned because we concatenate two 4k tables. With this patch, the mask becomes 0x0000ffffffffe000, which is what we want. Fixes: 0369f6a34b9f ("arm64: KVM: EL2 register definitions") Cc: # 3.11.x Reviewed-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Kristina Martsenko Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7f069ff37f06cf..715d395ef45bb2 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -170,8 +170,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) From 5553b142be11e794ebc0805950b2e8313f93d718 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Nov 2017 17:58:21 +0000 Subject: [PATCH 09/28] arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one VTTBR_BADDR_MASK is used to sanity check the size and alignment of the VTTBR address. It seems to currently be off by one, thereby only allowing up to 39-bit addresses (instead of 40-bit) and also insufficiently checking the alignment. This patch fixes it. This patch is the 32bit pendent of Kristina's arm64 fix, and she deserves the actual kudos for pinpointing that one. Fixes: f7ed45be3ba52 ("KVM: ARM: World-switch implementation") Cc: # 3.9 Reported-by: Kristina Martsenko Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index c8781450905be9..3ab8b3781bfeca 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,8 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) From 696673d192f52c2c5a702224ee21f005318a844b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 16 Nov 2017 15:39:19 +0000 Subject: [PATCH 10/28] KVM: arm/arm64: debug: Introduce helper for single-step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After emulating instructions we may want return to user-space to handle single-step debugging. Introduce a helper function, which, if single-step is enabled, sets the run structure for return and returns true. Signed-off-by: Alex Bennée Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/debug.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 242151ea69087a..a9f7d3f47134a9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + return false; +} int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 674912d7a57194..ea6cb5b24258be 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -370,6 +370,7 @@ void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index dbadfaf850a7d0..fa63b28c65e08a 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) } } } + + +/* + * After successfully emulating an instruction, we might want to + * return to user space with a KVM_EXIT_DEBUG. We can only do this + * once the emulation is complete, though, so for userspace emulations + * we have to wait until we have re-entered KVM before calling this + * helper. + * + * Return true (and set exit_reason) to return to userspace or false + * if no further action is required. + */ +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT; + return true; + } + return false; +} From 7226bc2e126dfb6d81e85a75f373ea4cc3619406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 16 Nov 2017 15:39:20 +0000 Subject: [PATCH 11/28] kvm: arm64: handle single-stepping trapped instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we are using guest debug to single-step the guest, we need to ensure that we exit after emulating the instruction. This only affects instructions completely emulated by the kernel. For instructions emulated in userspace, we need to exit and return to complete the emulation. The kvm_arm_handle_step_debug() helper sets up the necessary exit state if needed. Signed-off-by: Alex Bennée Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 49 +++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b712479954692f..029c28dd25e969 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -186,6 +186,40 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) return arm_exit_handlers[hsr_ec]; } +/* + * We may be single-stepping an emulated instruction. If the emulation + * has been completed in the kernel, we can return to userspace with a + * KVM_EXIT_DEBUG, otherwise userspace needs to complete its + * emulation first. + */ +static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int handled; + + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + handled = 1; + } else { + exit_handle_fn exit_handler; + + exit_handler = kvm_get_exit_handler(vcpu); + handled = exit_handler(vcpu, run); + } + + /* + * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run + * structure if we need to return to userspace. + */ + if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run)) + handled = 0; + + return handled; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -193,8 +227,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) { - exit_handle_fn exit_handler; - if (ARM_SERROR_PENDING(exception_index)) { u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); @@ -222,18 +254,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, kvm_inject_vabt(vcpu); return 1; case ARM_EXCEPTION_TRAP: - /* - * See ARM ARM B1.14.1: "Hyp traps on instructions - * that fail their condition code check" - */ - if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return 1; - } - - exit_handler = kvm_get_exit_handler(vcpu); - - return exit_handler(vcpu, run); + return handle_trap_exceptions(vcpu, run); case ARM_EXCEPTION_HYP_GONE: /* * EL2 has been reset to the hyp-stub. This happens when a guest From 1eb591288b956bdd75e464e69b6b8207ffa6e5e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 16 Nov 2017 15:39:21 +0000 Subject: [PATCH 12/28] kvm: arm64: handle single-step of userspace mmio instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The system state of KVM when using userspace emulation is not complete until we return into KVM_RUN. To handle mmio related updates we wait until they have been committed and then schedule our KVM_EXIT_DEBUG. The kvm_arm_handle_step_debug() helper tells us if we need to return and sets up the exit_reason for us. Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a6524ff27de495..322c570d211e90 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -628,6 +628,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) return ret; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) + return 0; + } if (run->immediate_exit) From e70dce73befcf96607bc6e24c2c8f84229d6721e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 23 Nov 2017 12:11:33 +0000 Subject: [PATCH 13/28] kvm: arm64: handle single-step during SError exceptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an SError arrives during single-step both the SError and debug exceptions may be pending when the step is completed, and the architecture doesn't define the ordering of the two. This means that we can observe en SError even though we've just completed a step, without receiving a debug exception. In that case the DBG_SPSR_SS bit will have flipped as the instruction executed. After handling the abort in handle_exit() we test to see if the bit is clear and we were single-stepping before deciding if we need to exit to user space. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 029c28dd25e969..304203fa9e3307 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -28,6 +28,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -252,7 +253,12 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, return 1; case ARM_EXCEPTION_EL1_SERROR: kvm_inject_vabt(vcpu); - return 1; + /* We may still need to return for single-step */ + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS) + && kvm_arm_handle_step_debug(vcpu, run)) + return 0; + else + return 1; case ARM_EXCEPTION_TRAP: return handle_trap_exceptions(vcpu, run); case ARM_EXCEPTION_HYP_GONE: From e3feebf81744acd8b581e5eb58a93e8fdcf042a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 23 Nov 2017 12:11:34 +0000 Subject: [PATCH 14/28] kvm: arm64: handle single-step of hyp emulated mmio instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a fast-path of MMIO emulation inside hyp mode. The handling of single-step is broadly the same as kvm_arm_handle_step_debug() except we just setup ESR/HSR so handle_exit() does the correct thing as we exit. For the case of an emulated illegal access causing an SError we will exit via the ARM_EXCEPTION_EL1_SERROR path in handle_exit(). We behave as we would during a real SError and clear the DBG_SPSR_SS bit for the emulated instruction. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp/switch.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 525c01f4886780..f7c651f3a8c0e8 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -22,6 +22,7 @@ #include #include #include +#include static bool __hyp_text __fpsimd_enabled_nvhe(void) { @@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) +/* Skip an instruction which has been emulated. Returns true if + * execution can continue or false if we need to exit hyp mode because + * single-step was in effect. + */ +static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(elr); @@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) } write_sysreg_el2(*vcpu_pc(vcpu), elr); + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + vcpu->arch.fault.esr_el2 = + (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22; + return false; + } else { + return true; + } } int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) @@ -342,13 +355,21 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) int ret = __vgic_v2_perform_cpuif_access(vcpu); if (ret == 1) { - __skip_instr(vcpu); - goto again; + if (__skip_instr(vcpu)) + goto again; + else + exit_code = ARM_EXCEPTION_TRAP; } if (ret == -1) { - /* Promote an illegal access to an SError */ - __skip_instr(vcpu); + /* Promote an illegal access to an + * SError. If we would be returning + * due to single-step clear the SS + * bit so handle_exit knows what to + * do after dealing with the error. + */ + if (!__skip_instr(vcpu)) + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; exit_code = ARM_EXCEPTION_EL1_SERROR; } @@ -363,8 +384,10 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) int ret = __vgic_v3_perform_cpuif_access(vcpu); if (ret == 1) { - __skip_instr(vcpu); - goto again; + if (__skip_instr(vcpu)) + goto again; + else + exit_code = ARM_EXCEPTION_TRAP; } /* 0 falls through to be handled out of EL2 */ From 22601127c0faa5db70ab88f23af11cb23c8f6cdf Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 29 Nov 2017 17:05:16 +0100 Subject: [PATCH 15/28] KVM: arm/arm64: Avoid attempting to load timer vgic state without a vgic The timer optimization patches inadvertendly changed the logic to always load the timer state as if we have a vgic, even if we don't have a vgic. Fix this by doing the usual irqchip_in_kernel() check and call the appropriate load function. Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 190c99ed1b7366..f9555b1e7f158f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -835,7 +835,10 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) no_vgic: preempt_disable(); timer->enabled = 1; - kvm_timer_vcpu_load_vgic(vcpu); + if (!irqchip_in_kernel(vcpu->kvm)) + kvm_timer_vcpu_load_user(vcpu); + else + kvm_timer_vcpu_load_vgic(vcpu); preempt_enable(); return 0; From 58d0d19a204604ca0da26058828a53558b265da3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Nov 2017 15:18:19 +0000 Subject: [PATCH 16/28] kvm: arm: don't treat unavailable HYP mode as an error Since it is perfectly legal to run the kernel at EL1, it is not actually an error if HYP mode is not available when attempting to initialize KVM, given that KVM support cannot be built as a module. So demote the kvm_err() to kvm_info(), which prevents the error from appearing on an otherwise 'quiet' console. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 322c570d211e90..ca65d06b38a8e0 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1507,7 +1507,7 @@ int kvm_arch_init(void *opaque) bool in_hyp_mode; if (!is_hyp_mode_available()) { - kvm_err("HYP mode not available\n"); + kvm_info("HYP mode not available\n"); return -ENODEV; } From 7465894e90e5a47e0e52aa5f1f708653fc40020f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Nov 2017 17:00:30 +0000 Subject: [PATCH 17/28] KVM: arm/arm64: Fix spinlock acquisition in vgic_set_owner vgic_set_owner acquires the irq lock without disabling interrupts, resulting in a lockdep splat (an interrupt could fire and result in the same lock being taken if the same virtual irq is to be injected). In practice, it is almost impossible to trigger this bug, but better safe than sorry. Convert the lock acquisition to a spin_lock_irqsave() and keep lockdep happy. Reported-by: James Morse Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 786cce7bd2ec62..ecb8e25f5fe56d 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -492,6 +492,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) { struct vgic_irq *irq; + unsigned long flags; int ret = 0; if (!vgic_initialized(vcpu->kvm)) @@ -502,12 +503,12 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) return -EINVAL; irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (irq->owner && irq->owner != owner) ret = -EEXIST; else irq->owner = owner; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); return ret; } From 6b2ad81bcfedaf36ceb8e6e71a58ad4ebd716313 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 27 Nov 2017 19:17:18 +0100 Subject: [PATCH 18/28] KVM: arm/arm64: kvm_arch_destroy_vm cleanups kvm_vgic_vcpu_destroy already gets called from kvm_vgic_destroy for each vcpu, so we don't have to call it from kvm_arch_vcpu_free. Additionally the other architectures set kvm->online_vcpus to zero after freeing them. We might as well do that for ARM too. Signed-off-by: Andrew Jones Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index ca65d06b38a8e0..675844c2174a22 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -188,6 +188,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } + atomic_set(&kvm->online_vcpus, 0); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -296,7 +297,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); - kvm_vgic_vcpu_destroy(vcpu); kvm_pmu_vcpu_destroy(vcpu); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); From fc396e066318c0a02208c1d3f0b62950a7714999 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 3 Dec 2017 23:54:41 +0100 Subject: [PATCH 19/28] KVM: arm/arm64: Fix broken GICH_ELRSR big endian conversion We are incorrectly rearranging 32-bit words inside a 64-bit typed value for big endian systems, which would result in never marking a virtual interrupt as inactive on big endian systems (assuming 32 or fewer LRs on the hardware). Fix this by not doing any word order manipulation for the typed values. Cc: Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v2-sr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index a3f18d3623661a..d7fd46fe9efb35 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -34,11 +34,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) else elrsr1 = 0; -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; -#else cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -#endif } static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) From f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 14 Nov 2017 16:54:23 -0500 Subject: [PATCH 20/28] x86,kvm: move qemu/guest FPU switching out to vcpu_run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, every time a VCPU is scheduled out, the host kernel will first save the guest FPU/xstate context, then load the qemu userspace FPU context, only to then immediately save the qemu userspace FPU context back to memory. When scheduling in a VCPU, the same extraneous FPU loads and saves are done. This could be avoided by moving from a model where the guest FPU is loaded and stored with preemption disabled, to a model where the qemu userspace FPU is swapped out for the guest FPU context for the duration of the KVM_RUN ioctl. This is done under the VCPU mutex, which is also taken when other tasks inspect the VCPU FPU context, so the code should already be safe for this change. That should come as no surprise, given that s390 already has this optimization. This can fix a bug where KVM calls get_user_pages while owning the FPU, and the file system ends up requesting the FPU again: [258270.527947] __warn+0xcb/0xf0 [258270.527948] warn_slowpath_null+0x1d/0x20 [258270.527951] kernel_fpu_disable+0x3f/0x50 [258270.527953] __kernel_fpu_begin+0x49/0x100 [258270.527955] kernel_fpu_begin+0xe/0x10 [258270.527958] crc32c_pcl_intel_update+0x84/0xb0 [258270.527961] crypto_shash_update+0x3f/0x110 [258270.527968] crc32c+0x63/0x8a [libcrc32c] [258270.527975] dm_bm_checksum+0x1b/0x20 [dm_persistent_data] [258270.527978] node_prepare_for_write+0x44/0x70 [dm_persistent_data] [258270.527985] dm_block_manager_write_callback+0x41/0x50 [dm_persistent_data] [258270.527988] submit_io+0x170/0x1b0 [dm_bufio] [258270.527992] __write_dirty_buffer+0x89/0x90 [dm_bufio] [258270.527994] __make_buffer_clean+0x4f/0x80 [dm_bufio] [258270.527996] __try_evict_buffer+0x42/0x60 [dm_bufio] [258270.527998] dm_bufio_shrink_scan+0xc0/0x130 [dm_bufio] [258270.528002] shrink_slab.part.40+0x1f5/0x420 [258270.528004] shrink_node+0x22c/0x320 [258270.528006] do_try_to_free_pages+0xf5/0x330 [258270.528008] try_to_free_pages+0xe9/0x190 [258270.528009] __alloc_pages_slowpath+0x40f/0xba0 [258270.528011] __alloc_pages_nodemask+0x209/0x260 [258270.528014] alloc_pages_vma+0x1f1/0x250 [258270.528017] do_huge_pmd_anonymous_page+0x123/0x660 [258270.528021] handle_mm_fault+0xfd3/0x1330 [258270.528025] __get_user_pages+0x113/0x640 [258270.528027] get_user_pages+0x4f/0x60 [258270.528063] __gfn_to_pfn_memslot+0x120/0x3f0 [kvm] [258270.528108] try_async_pf+0x66/0x230 [kvm] [258270.528135] tdp_page_fault+0x130/0x280 [kvm] [258270.528149] kvm_mmu_page_fault+0x60/0x120 [kvm] [258270.528158] handle_ept_violation+0x91/0x170 [kvm_intel] [258270.528162] vmx_handle_exit+0x1ca/0x1400 [kvm_intel] No performance changes were detected in quick ping-pong tests on my 4 socket system, which is expected since an FPU+xstate load is on the order of 0.1us, while ping-ponging between CPUs is on the order of 20us, and somewhat noisy. Cc: stable@vger.kernel.org Signed-off-by: Rik van Riel Suggested-by: Christian Borntraeger Signed-off-by: Paolo Bonzini [Fixed a bug where reset_vcpu called put_fpu without preceding load_fpu, which happened inside from KVM_CREATE_VCPU ioctl. - Radim] Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 13 +++++++++++ arch/x86/kvm/x86.c | 39 ++++++++++++++------------------- include/linux/kvm_host.h | 2 +- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 977de5fb968be4..62527e053ee459 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -536,7 +536,20 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * QEMU userspace and the guest each have their own FPU state. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpu" state here contains the guest FPU context, with the + * host PRKU bits. + */ + struct fpu user_fpu; struct fpu guest_fpu; + u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eee8e7faf1af57..c8da1680a7d644 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); } @@ -5254,13 +5253,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { - preempt_disable(); - kvm_load_guest_fpu(emul_to_vcpu(ctxt)); } static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) { - preempt_enable(); } static int emulator_intercept(struct x86_emulate_ctxt *ctxt, @@ -6952,7 +6948,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -7297,12 +7292,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; r = cui(vcpu); if (r <= 0) - goto out; + goto out_fpu; } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); @@ -7311,6 +7308,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else r = vcpu_run(vcpu); +out_fpu: + kvm_put_guest_fpu(vcpu); out: post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7704,32 +7703,25 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +/* Swap (qemu) user FPU context for the guest FPU context. */ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - if (vcpu->guest_fpu_loaded) - return; - - /* - * Restore all possible states in the guest, - * and assume host would use all available bits. - * Guest xcr0 would be loaded later. - */ - vcpu->guest_fpu_loaded = 1; - __kernel_fpu_begin(); + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ~XFEATURE_MASK_PKRU); + preempt_enable(); trace_kvm_fpu(1); } +/* When vcpu_run ends, restore user space FPU context. */ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) - return; - - vcpu->guest_fpu_loaded = 0; + preempt_disable(); copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - __kernel_fpu_end(); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -7846,7 +7838,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) * To avoid have the INIT path from kvm_apic_has_events() that be * called with loaded FPU and does not let userspace fix the state. */ - kvm_put_guest_fpu(vcpu); + if (init_event) + kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, XFEATURE_MASK_BNDREGS); if (mpx_state_buffer) @@ -7855,6 +7848,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) XFEATURE_MASK_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); + if (init_event) + kvm_load_guest_fpu(vcpu); } if (!init_event) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 893d6d606cd0a9..6bdd4b9f661154 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -232,7 +232,7 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int guest_fpu_loaded, guest_xcr0_loaded; + int guest_xcr0_loaded; struct swait_queue_head wq; struct pid __rcu *pid; int sigset_active; From 6ab0b9feb82a7ac09956e6761fec73cd47789df5 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 14 Nov 2017 16:54:24 -0500 Subject: [PATCH 21/28] x86,kvm: remove KVM emulator get_fpu / put_fpu Now that get_fpu and put_fpu do nothing, because the scheduler will automatically load and restore the guest FPU context for us while we are in this code (deep inside the vcpu_run main loop), we can get rid of the get_fpu and put_fpu hooks. Signed-off-by: Rik van Riel Suggested-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 2 -- arch/x86/kvm/emulate.c | 24 ------------------------ arch/x86/kvm/x86.c | 10 ---------- 3 files changed, 36 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 034caa1a084e36..b24b1c8b397989 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -214,8 +214,6 @@ struct x86_emulate_ops { void (*halt)(struct x86_emulate_ctxt *ctxt); void (*wbinvd)(struct x86_emulate_ctxt *ctxt); int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); - void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ - void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e7d04d0c8008d1..abe74f779f9d79 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op) static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fninit"); - ctxt->ops->put_fpu(ctxt); return X86EMUL_CONTINUE; } @@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fnstcw %0": "+m"(fcw)); - ctxt->ops->put_fpu(ctxt); ctxt->dst.val = fcw; @@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fnstsw %0": "+m"(fsw)); - ctxt->ops->put_fpu(ctxt); ctxt->dst.val = fsw; @@ -4001,12 +3987,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->get_fpu(ctxt); - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); - ctxt->ops->put_fpu(ctxt); - if (rc != X86EMUL_CONTINUE) return rc; @@ -4049,8 +4031,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->get_fpu(ctxt); - if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4066,8 +4046,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: - ctxt->ops->put_fpu(ctxt); - return rc; } @@ -5317,9 +5295,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; - ctxt->ops->get_fpu(ctxt); rc = asm_safe("fwait"); - ctxt->ops->put_fpu(ctxt); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8da1680a7d644..6ca747abfa2f35 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5251,14 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) emul_to_vcpu(ctxt)->arch.halt_request = 1; } -static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) -{ -} - -static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) -{ -} - static int emulator_intercept(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -5336,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = { .halt = emulator_halt, .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, - .get_fpu = emulator_get_fpu, - .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, .set_nmi_mask = emulator_set_nmi_mask, From d59d51f088014f25c2562de59b9abff4f42a7468 Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Fri, 1 Dec 2017 10:21:09 -0800 Subject: [PATCH 22/28] KVM: VMX: remove I/O port 0x80 bypass on Intel hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes CVE-2017-1000407. KVM allows guests to directly access I/O port 0x80 on Intel hosts. If the guest floods this port with writes it generates exceptions and instability in the host kernel, leading to a crash. With this change guest writes to port 0x80 on Intel will behave the same as they currently behave on AMD systems. Prevent the flooding by removing the code that sets port 0x80 as a passthrough port. This is essentially the same as upstream patch 99f85a28a78e96d28907fe036e1671a218fee597, except that patch was for AMD chipsets and this patch is for Intel. Signed-off-by: Andrew Honig Signed-off-by: Jim Mattson Fixes: fdef3ad1b386 ("KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs") Cc: Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4704aaf6d19e2e..2fd9a8cec29a29 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6755,12 +6755,7 @@ static __init int hardware_setup(void) memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); From 2895db67b01cb875457c9c3f30a14723b6b5dfd5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 1 Dec 2017 09:57:56 -0800 Subject: [PATCH 23/28] KVM: VMX: fix page leak in hardware_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_io_bitmap_b should not be allocated twice. Fixes: 23611332938d ("KVM: VMX: refactor setup of global page-sized bitmaps") Signed-off-by: Jim Mattson Reviewed-by: Krish Sadhukhan Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2fd9a8cec29a29..8eba631c4dbd50 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6751,7 +6751,6 @@ static __init int hardware_setup(void) goto out; } - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); From d809aa238744ae5b7520b73ac5411862ccfdc1bc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:33 +0100 Subject: [PATCH 24/28] KVM: s390: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kvm/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Message-Id: <20171124140043.10062-3-gregkh@linuxfoundation.org> Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/Makefile | 1 + arch/s390/kvm/diag.c | 1 + arch/s390/kvm/gaccess.h | 1 + arch/s390/kvm/guestdbg.c | 1 + arch/s390/kvm/intercept.c | 1 + arch/s390/kvm/interrupt.c | 1 + arch/s390/kvm/irq.h | 1 + arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/kvm-s390.h | 1 + arch/s390/kvm/priv.c | 1 + arch/s390/kvm/sigp.c | 1 + arch/s390/kvm/vsie.c | 1 + 12 files changed, 12 insertions(+) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 6048b1c6e58062..2c5a8190d5cefe 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for kernel virtual machines on s390 # # Copyright IBM Corp. 2008 diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index d93a2c0474bf67..481e1fe6095f31 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling diagnose instructions * diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index bec42b852246f0..443b7fa82d568a 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * access guest memory * diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index bcbd86621d0180..d6a1bead813cfc 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kvm guest debug support * diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 8fe034beb62321..fc76a91a782098 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * in-kernel handling for sie intercepts * diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fa557372d600a0..f37a34ea16011a 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling kvm guest interrupts * diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h index d98e4159643df4..54a4a59cb4f9c7 100644 --- a/arch/s390/kvm/irq.h +++ b/arch/s390/kvm/irq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * s390 irqchip routines * diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98ad8b9e036093..0d45e32dd868ff 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * hosting zSeries kernel virtual machines * diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 10d65dfbc30673..9777924c4b5cab 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for kvm on s390 * diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c954ac49eee471..c9d962ac140b23 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling privileged instructions * diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 9d592ef4104b04..d12ac5d6e8bb79 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling interprocessor communication * diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a311938b63b3b4..d2b7886da44405 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kvm nested virtualization support for s390x * From 940f89a5a37789b94f332755767c556a64b004e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:39 +0100 Subject: [PATCH 25/28] KVM: s390: Remove redundant license text Now that the SPDX tag is in all arch/s390/kvm/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Message-Id: <20171124140043.10062-9-gregkh@linuxfoundation.org> Acked-by: Cornelia Huck Acked-by: "Paul E. McKenney" Signed-off-by: Christian Borntraeger --- arch/s390/kvm/Makefile | 4 ---- arch/s390/kvm/diag.c | 4 ---- arch/s390/kvm/gaccess.h | 4 ---- arch/s390/kvm/guestdbg.c | 4 ---- arch/s390/kvm/intercept.c | 4 ---- arch/s390/kvm/interrupt.c | 4 ---- arch/s390/kvm/irq.h | 4 ---- arch/s390/kvm/kvm-s390.c | 4 ---- arch/s390/kvm/kvm-s390.h | 4 ---- arch/s390/kvm/priv.c | 4 ---- arch/s390/kvm/sigp.c | 4 ---- arch/s390/kvm/vsie.c | 4 ---- 12 files changed, 48 deletions(-) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 2c5a8190d5cefe..05ee90a5ea08bd 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -2,10 +2,6 @@ # Makefile for kernel virtual machines on s390 # # Copyright IBM Corp. 2008 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License (version 2 only) -# as published by the Free Software Foundation. KVM := ../../../virt/kvm common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 481e1fe6095f31..89aa114a2cbada 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2011 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 443b7fa82d568a..f4c51756c46239 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d6a1bead813cfc..b5f3e82006d0b2 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): David Hildenbrand */ #include diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index fc76a91a782098..9c7d707158622e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f37a34ea16011a..024ad8bcc51655 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2015 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h index 54a4a59cb4f9c7..484608c71dd011 100644 --- a/arch/s390/kvm/irq.h +++ b/arch/s390/kvm/irq.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ #ifndef __KVM_IRQ_H diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0d45e32dd868ff..34375eed93ee23 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger * Heiko Carstens diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 9777924c4b5cab..5e46ba429bcb4d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger * Christian Ehrhardt diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c9d962ac140b23..28b69ab56b7b3a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d12ac5d6e8bb79..c1f5cde2c878e6 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger * Christian Ehrhardt diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d2b7886da44405..5d6ae0326d9e8f 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2016 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): David Hildenbrand */ #include From bb64da9aba89765fee74b395967b18a7d6c364e9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 21 Nov 2017 16:02:52 +0100 Subject: [PATCH 26/28] KVM: s390: mark irq_state.flags as non-usable Old kernels did not check for zero in the irq_state.flags field and old QEMUs did not zero the flag/reserved fields when calling KVM_S390_*_IRQ_STATE. Let's add comments to prevent future uses of these fields. Signed-off-by: Christian Borntraeger Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 15 ++++++++++++--- arch/s390/kvm/kvm-s390.c | 6 ++++-- include/uapi/linux/kvm.h | 4 ++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f670e4b9e7f33f..57d3ee9e4bde2a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2901,14 +2901,19 @@ userspace buffer and its length: struct kvm_s390_irq_state { __u64 buf; - __u32 flags; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 reserved[4]; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; Userspace passes in the above struct and for each pending interrupt a struct kvm_s390_irq is copied to the provided buffer. +The structure contains a flags and a reserved field for future extensions. As +the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and +reserved, these fields can not be used in the future without breaking +compatibility. + If -ENOBUFS is returned the buffer provided was too small and userspace may retry with a bigger buffer. @@ -2932,10 +2937,14 @@ containing a struct kvm_s390_irq_state: struct kvm_s390_irq_state { __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 pad; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; +The restrictions for flags and reserved apply as well. +(see KVM_S390_GET_IRQ_STATE) + The userspace memory referenced by buf contains a struct kvm_s390_irq for each interrupt to be injected into the guest. If one of the interrupts could not be injected for some reason the diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 34375eed93ee23..efa439f6ffb393 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* - * hosting zSeries kernel virtual machines + * hosting IBM Z kernel virtual machines (s390x) * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2017 * * Author(s): Carsten Otte * Christian Borntraeger @@ -3808,6 +3808,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; break; } + /* do not use irq_state.flags, it will break old QEMUs */ r = kvm_s390_set_irq_state(vcpu, (void __user *) irq_state.buf, irq_state.len); @@ -3823,6 +3824,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; break; } + /* do not use irq_state.flags, it will break old QEMUs */ r = kvm_s390_get_irq_state(vcpu, (__u8 __user *) irq_state.buf, irq_state.len); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 282d7613fce878..496e59a2738ba9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -630,9 +630,9 @@ struct kvm_s390_irq { struct kvm_s390_irq_state { __u64 buf; - __u32 flags; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 reserved[4]; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; /* for KVM_SET_GUEST_DEBUG */ From ca76ec9ca871e67d8cd0b6caba24aca3d3ac4546 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 4 Dec 2017 12:19:11 +0100 Subject: [PATCH 27/28] KVM: s390: Fix skey emulation permission check All skey functions call skey_check_enable at their start, which checks if we are in the PSTATE and injects a privileged operation exception if we are. Unfortunately they continue processing afterwards and perform the operation anyhow as skey_check_enable does not deliver an error if the exception injection was successful. Let's move the PSTATE check into the skey functions and exit them on such an occasion, also we now do not enable skey handling anymore in such a case. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Fixes: a7e19ab ("KVM: s390: handle missing storage-key facility") Cc: # v4.8+ Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 28b69ab56b7b3a..572496c688cc0c 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -232,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return -EAGAIN; } - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); return 0; } @@ -244,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -273,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -308,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; From b1394e745b9453dcb5b0671c205b770e87dedb87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 30 Nov 2017 19:05:45 +0100 Subject: [PATCH 28/28] KVM: x86: fix APIC page invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementation of the unpinned APIC page didn't update the VMCS address cache when invalidation was done through range mmu notifiers. This became a problem when the page notifier was removed. Re-introduce the arch-specific helper and call it from ...range_start. Reported-by: Fabian Grünbichler Fixes: 38b9917350cb ("kvm: vmx: Implement set_apic_access_page_addr") Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2") Cc: Reviewed-by: Paolo Bonzini Reviewed-by: Andrea Arcangeli Tested-by: Wanpeng Li Tested-by: Fabian Grünbichler Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/x86.c | 14 ++++++++++++++ virt/kvm/kvm_main.c | 8 ++++++++ 3 files changed, 25 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 62527e053ee459..51679843132829 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1448,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6ca747abfa2f35..faf843c9b916ea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6764,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + unsigned long apic_address; + + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (start <= apic_address && apic_address < end) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c422c10cd1dd17..210bf820385a70 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -135,6 +135,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms; +__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) @@ -360,6 +365,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + + kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + srcu_read_unlock(&kvm->srcu, idx); }