From e13dcc1ab593452b99c39de2cb588c39f6d6a7e9 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:49 +0000 Subject: [PATCH 01/56] PPC: epapr: create define for return code value of success Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index bf2c06c338719..c0c7adcd21e3a 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -88,7 +88,8 @@ #define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) #define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) -/* epapr error codes */ +/* epapr return codes */ +#define EV_SUCCESS 0 #define EV_EPERM 1 /* Operation not permitted */ #define EV_ENOENT 2 /* Entry Not Found */ #define EV_EIO 3 /* I/O error occured */ From fdcf8bd7e711d4c0fe3ef624cfb5e3808149ff7f Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:50 +0000 Subject: [PATCH 02/56] KVM: PPC: use definitions in epapr header for hcalls Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_para.h | 21 +++++++++++---------- arch/powerpc/kernel/kvm.c | 2 +- arch/powerpc/kvm/powerpc.c | 10 +++++----- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index c18916bff689a..a168ce37d85ce 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared { }; #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ -#define HC_VENDOR_KVM (42 << 16) -#define HC_EV_SUCCESS 0 -#define HC_EV_UNIMPLEMENTED 12 + +#define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num) + +#include #define KVM_FEATURE_MAGIC_PAGE 1 @@ -121,7 +122,7 @@ static unsigned long kvm_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) { - return HC_EV_UNIMPLEMENTED; + return EV_UNIMPLEMENTED; } #endif @@ -132,7 +133,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) unsigned long out[8]; unsigned long r; - r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); *r2 = out[0]; return r; @@ -143,7 +144,7 @@ static inline long kvm_hypercall0(unsigned int nr) unsigned long in[8]; unsigned long out[8]; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) @@ -152,7 +153,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) unsigned long out[8]; in[0] = p1; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, @@ -163,7 +164,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, @@ -175,7 +176,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; in[2] = p3; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, @@ -189,7 +190,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, in[1] = p2; in[2] = p3; in[3] = p4; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 867db1de89495..a61b133c4f99b 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data) in[0] = KVM_MAGIC_PAGE; in[1] = KVM_MAGIC_PAGE; - kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); *features = out[0]; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4d213b8b0fb55..0368a9391b216 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -67,18 +67,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) } switch (nr) { - case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: + case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE): { vcpu->arch.magic_page_pa = param1; vcpu->arch.magic_page_ea = param2; r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; - r = HC_EV_SUCCESS; + r = EV_SUCCESS; break; } - case HC_VENDOR_KVM | KVM_HC_FEATURES: - r = HC_EV_SUCCESS; + case KVM_HCALL_TOKEN(KVM_HC_FEATURES): + r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); @@ -87,7 +87,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; default: - r = HC_EV_UNIMPLEMENTED; + r = EV_UNIMPLEMENTED; break; } From 784bafac79e7646e56f40998a6dde0e1ed5595f8 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:51 +0000 Subject: [PATCH 03/56] KVM: PPC: add pvinfo for hcall opcodes on e500mc/e5500 Signed-off-by: Liu Yu [stuart: factored this out from idle hcall support in host patch] Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0368a9391b216..a478e662b2bce 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -751,9 +751,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) { + u32 inst_nop = 0x60000000; +#ifdef CONFIG_KVM_BOOKE_HV + u32 inst_sc1 = 0x44000022; + pvinfo->hcall[0] = inst_sc1; + pvinfo->hcall[1] = inst_nop; + pvinfo->hcall[2] = inst_nop; + pvinfo->hcall[3] = inst_nop; +#else u32 inst_lis = 0x3c000000; u32 inst_ori = 0x60000000; - u32 inst_nop = 0x60000000; u32 inst_sc = 0x44000002; u32 inst_imm_mask = 0xffff; @@ -770,6 +777,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); pvinfo->hcall[2] = inst_sc; pvinfo->hcall[3] = inst_nop; +#endif return 0; } From 9202e07636f0c4858ba6c30773a3f160b2b5659a Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:52 +0000 Subject: [PATCH 04/56] KVM: PPC: Add support for ePAPR idle hcall in host kernel And add a new flag definition in kvm_ppc_pvinfo to indicate whether the host supports the EV_IDLE hcall. Signed-off-by: Liu Yu [stuart.yoder@freescale.com: cleanup,fixes for conditions allowing idle] Signed-off-by: Stuart Yoder [agraf: fix typo] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 7 +++++-- arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/kvm/powerpc.c | 10 ++++++++-- include/linux/kvm.h | 2 ++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f6ec3a92e6214..170afb1fbc2e1 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1194,12 +1194,15 @@ struct kvm_ppc_pvinfo { This ioctl fetches PV specific information that need to be passed to the guest using the device tree or other means from vm context. -For now the only implemented piece of information distributed here is an array -of 4 instructions that make up a hypercall. +The hcall array defines 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +The flags bitmap is defined as: + + /* the host supports the ePAPR idle hcall + #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) 4.48 KVM_ASSIGN_PCI_DEVICE diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7e313f1ed1832..13d6b7bf3b698 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -34,5 +34,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += epapr_hcalls.h generic-y += rwsem.h diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a478e662b2bce..dbf56e173c25b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,8 +38,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.shared->msr & MSR_WE) || - !!(v->arch.pending_exceptions) || + return !!(v->arch.pending_exceptions) || v->requests; } @@ -86,6 +85,11 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; + case EV_HCALL_TOKEN(EV_IDLE): + r = EV_SUCCESS; + kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + break; default: r = EV_UNIMPLEMENTED; break; @@ -779,6 +783,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[3] = inst_nop; #endif + pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE; + return 0; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0a6d6ba44c858..4cb3761bebae9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -477,6 +477,8 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ From 2f979de8a716bdbdc9f4db532652fbca08ed710c Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:53 +0000 Subject: [PATCH 05/56] KVM: PPC: ev_idle hcall support for e500 guests Signed-off-by: Liu Yu [varun: 64-bit changes] Signed-off-by: Varun Sethi Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 11 +++++----- arch/powerpc/kernel/epapr_hcalls.S | 28 +++++++++++++++++++++++++ arch/powerpc/kernel/epapr_paravirt.c | 11 +++++++++- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c0c7adcd21e3a..833ce2c2d505b 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -50,10 +50,6 @@ #ifndef _EPAPR_HCALLS_H #define _EPAPR_HCALLS_H -#include -#include -#include - #define EV_BYTE_CHANNEL_SEND 1 #define EV_BYTE_CHANNEL_RECEIVE 2 #define EV_BYTE_CHANNEL_POLL 3 @@ -109,6 +105,11 @@ #define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ #define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ +#ifndef __ASSEMBLY__ +#include +#include +#include + /* * Hypercall register clobber list * @@ -506,5 +507,5 @@ static inline unsigned int ev_idle(void) return r3; } - +#endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 697b390ebfd8a..62c0dc2378266 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -8,13 +8,41 @@ */ #include +#include #include #include #include #include #include +#include #include +/* epapr_ev_idle() was derived from e500_idle() */ +_GLOBAL(epapr_ev_idle) + CURRENT_THREAD_INFO(r3, r1) + PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4, r4,_TLF_NAPPING /* so when we take an exception */ + PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + + wrteei 1 + +idle_loop: + LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) + +.global epapr_ev_idle_start +epapr_ev_idle_start: + li r3, -1 + nop + nop + nop + + /* + * Guard against spurious wakeups from a hypervisor -- + * only interrupt will cause us to return to LR due to + * _TLF_NAPPING. + */ + b idle_loop + /* Hypercall entry point. Will be patched with device tree instructions. */ .global epapr_hypercall_start epapr_hypercall_start: diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 028aeae370b65..f3eab8594d9f8 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -21,6 +21,10 @@ #include #include #include +#include + +extern void epapr_ev_idle(void); +extern u32 epapr_ev_idle_start[]; bool epapr_paravirt_enabled; @@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void) if (len % 4 || len > (4 * 4)) return -ENODEV; - for (i = 0; i < (len / 4); i++) + for (i = 0; i < (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); + patch_instruction(epapr_ev_idle_start + i, insts[i]); + } + + if (of_get_property(hyper_node, "has-idle", NULL)) + ppc_md.power_save = epapr_ev_idle; epapr_paravirt_enabled = true; From 40656397241860bb21f2802af17ac1de607fb7a9 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:54 +0000 Subject: [PATCH 06/56] PPC: select EPAPR_PARAVIRT for all users of epapr hcalls Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/platforms/Kconfig | 1 + drivers/tty/Kconfig | 1 + drivers/virt/Kconfig | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e7a896acd9827..48a920d514892 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -90,6 +90,7 @@ config MPIC config PPC_EPAPR_HV_PIC bool default n + select EPAPR_PARAVIRT config MPIC_WEIRD bool diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 830cd62d84925..aa99cd26ba8bc 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -358,6 +358,7 @@ config TRACE_SINK config PPC_EPAPR_HV_BYTECHAN tristate "ePAPR hypervisor byte channel driver" depends on PPC + select EPAPR_PARAVIRT help This driver creates /dev entries for each ePAPR hypervisor byte channel, thereby allowing applications to communicate with byte diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 2dcdbc9364d88..99ebdde590f82 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -15,6 +15,7 @@ if VIRT_DRIVERS config FSL_HV_MANAGER tristate "Freescale hypervisor management driver" depends on FSL_SOC + select EPAPR_PARAVIRT help The Freescale hypervisor management driver provides several services to drivers and applications related to the Freescale hypervisor: From 305bcf26128e380bb1296d2802387659ab8b038e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 3 Jul 2012 05:48:55 +0000 Subject: [PATCH 07/56] powerpc/fsl-soc: use CONFIG_EPAPR_PARAVIRT for hcalls Signed-off-by: Scott Wood Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/sysdev/fsl_msi.c | 9 +++++++-- arch/powerpc/sysdev/fsl_soc.c | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 6e097de00e093..7e2b2f2e3ecde 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; struct fsl_msi_cascade_data *cascade_data; - unsigned int ret; cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; @@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) case FSL_PIC_IP_IPIC: msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4); break; - case FSL_PIC_IP_VMPIC: +#ifdef CONFIG_EPAPR_PARAVIRT + case FSL_PIC_IP_VMPIC: { + unsigned int ret; ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value); if (ret) { pr_err("fsl-msi: fh_vmpic_get_msir() failed for " @@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) } break; } +#endif + } while (msir_value) { intr_index = ffs(msir_value) - 1; @@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = { .compatible = "fsl,ipic-msi", .data = (void *)&ipic_msi_feature, }, +#ifdef CONFIG_EPAPR_PARAVIRT { .compatible = "fsl,vmpic-msi", .data = (void *)&vmpic_msi_feature, }, +#endif {} }; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index c449dbd1c938f..97118dc3d2851 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops; EXPORT_SYMBOL(diu_ops); #endif +#ifdef CONFIG_EPAPR_PARAVIRT /* * Restart the current partition * @@ -278,3 +279,4 @@ void fsl_hv_halt(void) pr_info("hv exit\n"); fh_partition_stop(-1); } +#endif From 8e525d59d024f54b88a038faac38f76b9094774e Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:56 +0000 Subject: [PATCH 08/56] PPC: Don't use hardcoded opcode for ePAPR hcall invocation Signed-off-by: Liu Yu Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 22 +++++++-------- arch/powerpc/include/asm/fsl_hcalls.h | 36 ++++++++++++------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 833ce2c2d505b..b8d94459a929d 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -195,7 +195,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt, r5 = priority; r6 = destination; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) : : EV_HCALL_CLOBBERS4 ); @@ -224,7 +224,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt, r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) : : EV_HCALL_CLOBBERS4 ); @@ -254,7 +254,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt, r3 = interrupt; r4 = mask; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -279,7 +279,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt, r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -307,7 +307,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt) r11 = EV_HCALL_TOKEN(EV_INT_EOI); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -346,7 +346,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle, r7 = be32_to_cpu(p[2]); r8 = be32_to_cpu(p[3]); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) : : EV_HCALL_CLOBBERS6 @@ -385,7 +385,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle, r3 = handle; r4 = *count; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) : : EV_HCALL_CLOBBERS6 @@ -423,7 +423,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle, r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) : : EV_HCALL_CLOBBERS3 ); @@ -456,7 +456,7 @@ static inline unsigned int ev_int_iack(unsigned int handle, r11 = EV_HCALL_TOKEN(EV_INT_IACK); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -480,7 +480,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle) r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -500,7 +500,7 @@ static inline unsigned int ev_idle(void) r11 = EV_HCALL_TOKEN(EV_IDLE); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "=r" (r3) : : EV_HCALL_CLOBBERS1 ); diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h index 922d9b5fe3d5c..3abb58394da48 100644 --- a/arch/powerpc/include/asm/fsl_hcalls.h +++ b/arch/powerpc/include/asm/fsl_hcalls.h @@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask) r11 = FH_HCALL_TOKEN(FH_SEND_NMI); r3 = vcpu_mask; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle, r9 = (uint32_t)propvalue_addr; r10 = *propvalue_len; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8), "+r" (r9), "+r" (r10) @@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle, r9 = (uint32_t)propvalue_addr; r10 = propvalue_len; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8), "+r" (r9), "+r" (r10) @@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition) r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition, r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition, r4 = entry_point; r5 = load; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5) : : EV_HCALL_CLOBBERS3 ); @@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition) r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source, #endif r7 = count; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7) : : EV_HCALL_CLOBBERS5 @@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn) r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE); r3 = liodn; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn) r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE); r3 = liodn; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt, r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void) r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "=r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize, r6 = addr_lo; r7 = peek; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7) : : EV_HCALL_CLOBBERS5 @@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle, r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu) r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu) r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle) r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle) r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); From 97c95059848358f1577f471ec47cf68690f996e4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 2 Aug 2012 15:10:00 +0200 Subject: [PATCH 09/56] KVM: PPC: PR: Use generic tracepoint for guest exit We want to have tracing information on guest exits for booke as well as book3s. Since most information is identical, use a common trace point. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 3 ++ arch/powerpc/kvm/trace.h | 79 +++++++++++++++++++++++------------- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 05c28f59f77f4..7f0fe6f9e297a 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -549,7 +549,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* We get here with MSR.EE=0, so enable it to be a nice citizen */ __hard_irq_enable(); - trace_kvm_book3s_exit(exit_nr, vcpu); + trace_kvm_exit(exit_nr, vcpu); preempt_enable(); kvm_resched(vcpu); switch (exit_nr) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index d25a097c852b7..7ce2ed07831fa 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -39,6 +39,7 @@ #include "timing.h" #include "booke.h" +#include "trace.h" unsigned long kvmppc_booke_handlers; @@ -677,6 +678,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); + trace_kvm_exit(exit_nr, vcpu); + run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 877186b7b1c36..9fab6eddc7e4d 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,6 +31,57 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) +#ifdef CONFIG_KVM_BOOK3S_PR + __field( unsigned long, srr1 ) +#endif + __field( unsigned long, last_inst ) + ), + + TP_fast_assign( +#ifdef CONFIG_KVM_BOOK3S_PR + struct kvmppc_book3s_shadow_vcpu *svcpu; +#endif + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; +#ifdef CONFIG_KVM_BOOK3S_PR + svcpu = svcpu_get(vcpu); + __entry->srr1 = svcpu->shadow_srr1; + svcpu_put(svcpu); +#endif + __entry->last_inst = vcpu->arch.last_inst; + ), + + TP_printk("exit=0x%x" + " | pc=0x%lx" + " | msr=0x%lx" + " | dar=0x%lx" +#ifdef CONFIG_KVM_BOOK3S_PR + " | srr1=0x%lx" +#endif + " | last_inst=0x%lx" + , + __entry->exit_nr, + __entry->pc, + __entry->msr, + __entry->dar, +#ifdef CONFIG_KVM_BOOK3S_PR + __entry->srr1, +#endif + __entry->last_inst + ) +); + TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -105,34 +156,6 @@ TRACE_EVENT(kvm_gtlb_write, #ifdef CONFIG_KVM_BOOK3S_PR -TRACE_EVENT(kvm_book3s_exit, - TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), - TP_ARGS(exit_nr, vcpu), - - TP_STRUCT__entry( - __field( unsigned int, exit_nr ) - __field( unsigned long, pc ) - __field( unsigned long, msr ) - __field( unsigned long, dar ) - __field( unsigned long, srr1 ) - ), - - TP_fast_assign( - struct kvmppc_book3s_shadow_vcpu *svcpu; - __entry->exit_nr = exit_nr; - __entry->pc = kvmppc_get_pc(vcpu); - __entry->dar = kvmppc_get_fault_dar(vcpu); - __entry->msr = vcpu->arch.shared->msr; - svcpu = svcpu_get(vcpu); - __entry->srr1 = svcpu->shadow_srr1; - svcpu_put(svcpu); - ), - - TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", - __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, - __entry->srr1) -); - TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), TP_ARGS(r, vcpu), From f4800b1f4d23156e9080a08d6114e5d8bb767964 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 7 Aug 2012 10:24:14 +0200 Subject: [PATCH 10/56] KVM: PPC: Expose SYNC cap based on mmu notifiers Semantically, the "SYNC" cap means that we have mmu notifiers available. Express this in our #ifdef'ery around the feature, so that we can be sure we don't miss out on ppc targets when they get their implementation. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dbf56e173c25b..45fe433316ea8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -264,10 +264,16 @@ int kvm_dev_ioctl_check_extension(long ext) if (cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; break; +#endif case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_BOOK3S_64_HV r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; - break; +#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) + r = 1; +#else + r = 0; #endif + break; case KVM_CAP_NR_VCPUS: /* * Recommending a number of CPUs is somewhat arbitrary; we From cf1c5ca47319d9eb49166859921822fea354d4b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 1 Aug 2012 12:56:51 +0200 Subject: [PATCH 11/56] KVM: PPC: BookE: Expose remote TLB flushes in debugfs We're already counting remote TLB flushes in a variable, but don't export it to user space yet. Do so, so we know what's going on. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7ce2ed07831fa..1d4ce9a80f557 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "doorbell", VCPU_STAT(dbell_exits) }, { "guest doorbell", VCPU_STAT(gdbell_exits) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { NULL } }; From 2bb890f5ee79c85b9d3b7df37ecb639d8d4b961e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 2 Aug 2012 13:38:49 +0200 Subject: [PATCH 12/56] KVM: PPC: E500: Fix clear_tlb_refs Our mapping code assumes that TLB0 entries are always mapped. However, after calling clear_tlb_refs() this is no longer the case. Map them dynamically if we find an entry unmapped in TLB0. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index ff38b664195d6..b56b6e14df6c0 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1039,8 +1039,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); + /* Only triggers after clear_tlb_refs */ + if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) + kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); + else + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, + &priv->ref, eaddr, &stlbe); break; case 1: { From 1340f3e8871b9f35b39c33d0140383c6c6c1f005 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 6 Aug 2012 00:04:14 +0000 Subject: [PATCH 13/56] KVM: PPC: Quieten message about allocating linear regions This is printed once for every RMA or HPT region that get preallocated. If one preallocates hundreds of such regions (in order to run hundreds of KVM guests), that gets rather painful, so make it a bit quieter. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index fb4eac290fefc..ec0a9e5de1005 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type) linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); for (i = 0; i < count; ++i) { linear = alloc_bootmem_align(size, size); - pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, - size >> 20); + pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, + size >> 20); linear_info[i].base_virt = linear; linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; linear_info[i].npages = npages; From 8043e494da644ec174f7df0b67f88ccf8777a1ce Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 10 Aug 2012 12:21:21 +0000 Subject: [PATCH 14/56] powerpc/epapr: export epapr_hypercall_start This fixes breakage introduced by the following commit: commit 6d2d82627f4f1e96a33664ace494fa363e0495cb Author: Liu Yu-B13201 Date: Tue Jul 3 05:48:56 2012 +0000 PPC: Don't use hardcoded opcode for ePAPR hcall invocation when a driver that uses ePAPR hypercalls is built as a module. Reported-by: Geert Uytterhoeven Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/ppc_ksyms.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3e4031581c654..e597dde124e87 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 extern void transfer_to_handler(void); @@ -192,3 +193,7 @@ EXPORT_SYMBOL(__arch_hweight64); #ifdef CONFIG_PPC_BOOK3S_64 EXPORT_SYMBOL_GPL(mmu_psize_defs); #endif + +#ifdef CONFIG_EPAPR_PARAVIRT +EXPORT_SYMBOL(epapr_hypercall_start); +#endif From 4ffc6356ec690f77f65b7b78e0047a3fe8316371 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 20:31:13 +0200 Subject: [PATCH 15/56] KVM: PPC: BookE: Add check_requests helper function We need a central place to check for pending requests in. Add one that only does the timer check we already do in a different place. Later, this central function can be extended by more checks. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1d4ce9a80f557..bcf87fe891797 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -419,13 +419,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned int priority; - if (vcpu->requests) { - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { - smp_mb(); - update_timer_ints(vcpu); - } - } - priority = __ffs(*pending); while (priority < BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) @@ -461,6 +454,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } +static void kvmppc_check_requests(struct kvm_vcpu *vcpu) +{ + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) + update_timer_ints(vcpu); + } +} + /* * Common checks before entering the guest world. Call with interrupts * disabled. @@ -485,6 +486,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } + smp_mb(); + if (vcpu->requests) { + /* Make sure we process requests preemptable */ + local_irq_enable(); + kvmppc_check_requests(vcpu); + local_irq_disable(); + continue; + } + if (kvmppc_core_prepare_to_enter(vcpu)) { /* interrupts got enabled in between, so we are back at square 1 */ From d69c6436443c05a64452054f51a79316297755f4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 20:44:20 +0200 Subject: [PATCH 16/56] KVM: PPC: BookE: Add support for vcpu->mode Generic KVM code might want to know whether we are inside guest context or outside. It also wants to be able to push us out of guest context. Add support to the BookE code for the generic vcpu->mode field that describes the above states. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index bcf87fe891797..70a86c0a9d854 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -501,6 +501,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } + if (vcpu->mode == EXITING_GUEST_MODE) { + r = 1; + break; + } + + /* Going into guest context! Yay! */ + vcpu->mode = IN_GUEST_MODE; + smp_wmb(); + break; } @@ -572,6 +581,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvm_guest_exit(); out: + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); local_irq_enable(); return ret; } From 862d31f788f9a249f7656d02d8d4006e306108ce Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 31 Jul 2012 00:19:50 +0200 Subject: [PATCH 17/56] KVM: PPC: E500: Implement MMU notifiers The e500 target has lived without mmu notifiers ever since it got introduced, but fails for the user space check on them with hugetlbfs. So in order to get that one working, implement mmu notifiers in a reasonably dumb fashion and be happy. On embedded hardware, we almost never end up with mmu notifier calls, since most people don't overcommit. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/Kconfig | 2 + arch/powerpc/kvm/booke.c | 6 +++ arch/powerpc/kvm/e500_tlb.c | 60 ++++++++++++++++++++++++++--- 5 files changed, 65 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28e8f5e5c63e7..cea9d3aab71c9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,7 +46,8 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \ + defined(CONFIG_KVM_E500MC) #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e006f0bdea95f..88de3146838b4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,6 +104,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f4dacb9c57fac..40cad8c8bd0e8 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -123,6 +123,7 @@ config KVM_E500V2 depends on EXPERIMENTAL && E500 && !PPC_E500MC select KVM select KVM_MMIO + select MMU_NOTIFIER ---help--- Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -138,6 +139,7 @@ config KVM_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV + select MMU_NOTIFIER ---help--- Support running unmodified E500MC/E5500 (32-bit) guest kernels in virtual machines on E500MC/E5500 host processors. diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 70a86c0a9d854..52f6cbb4923e4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -459,6 +459,10 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu) if (vcpu->requests) { if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_core_flush_tlb(vcpu); +#endif } } @@ -579,6 +583,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvm_guest_exit(); + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); out: vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index b56b6e14df6c0..de8ea29409f2d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -303,18 +303,15 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags = E500_TLB_VALID; - if (tlbe_is_writable(gtlbe)) + if (tlbe_is_writable(gtlbe)) { ref->flags |= E500_TLB_DIRTY; + kvm_set_pfn_dirty(pfn); + } } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { - if (ref->flags & E500_TLB_DIRTY) - kvm_release_pfn_dirty(ref->pfn); - else - kvm_release_pfn_clean(ref->pfn); - ref->flags = 0; } } @@ -357,6 +354,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) clear_tlb_privs(vcpu_e500); } +void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + clear_tlb_refs(vcpu_e500); + clear_tlb1_bitmap(vcpu_e500); +} + static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { @@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); + + /* Drop refcount on page, so that mmu notifiers can clear it */ + kvm_release_pfn_clean(pfn); } /* XXX only map the one-one case, for now use TLB0 */ @@ -1064,6 +1071,47 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); } +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; From 6346046c3a69edc9149311473b940f3af7c93752 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 00:44:52 +0200 Subject: [PATCH 18/56] KVM: PPC: BookE: Add some more trace points Without trace points, debugging what exactly is going on inside guest code can be very tricky. Add a few more trace points at places that hopefully tell us more when things go wrong. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 3 ++ arch/powerpc/kvm/e500_tlb.c | 3 ++ arch/powerpc/kvm/trace.h | 71 +++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 52f6cbb4923e4..00bcc57428c76 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -143,6 +143,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) { + trace_kvm_booke_queue_irqprio(vcpu, priority); set_bit(priority, &vcpu->arch.pending_exceptions); } @@ -457,6 +458,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) static void kvmppc_check_requests(struct kvm_vcpu *vcpu) { if (vcpu->requests) { + trace_kvm_check_requests(vcpu); + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index de8ea29409f2d..1af6fab58995b 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -312,6 +312,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } } @@ -1075,6 +1076,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { + trace_kvm_unmap_hva(hva); + /* * Flush all shadow tlb entries everywhere. This is slow, but * we are 100% sure that we catch the to be unmapped page diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 9fab6eddc7e4d..cb2780a42fd8a 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -82,6 +82,21 @@ TRACE_EVENT(kvm_exit, ) ); +TRACE_EVENT(kvm_unmap_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -149,6 +164,24 @@ TRACE_EVENT(kvm_gtlb_write, __entry->word1, __entry->word2) ); +TRACE_EVENT(kvm_check_requests, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, requests ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->requests = vcpu->requests; + ), + + TP_printk("vcpu=%x requests=%x", + __entry->cpu_nr, __entry->requests) +); + /************************************************************************* * Book3S trace points * @@ -418,6 +451,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write, __entry->mas2, __entry->mas7_3) ); +TRACE_EVENT(kvm_booke206_ref_release, + TP_PROTO(__u64 pfn, __u32 flags), + TP_ARGS(pfn, flags), + + TP_STRUCT__entry( + __field( __u64, pfn ) + __field( __u32, flags ) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->flags = flags; + ), + + TP_printk("pfn=%llx flags=%x", + __entry->pfn, __entry->flags) +); + +TRACE_EVENT(kvm_booke_queue_irqprio, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), + TP_ARGS(vcpu, priority), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, priority ) + __field( unsigned long, pending ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->priority = priority; + __entry->pending = vcpu->arch.pending_exceptions; + ), + + TP_printk("vcpu=%x prio=%x pending=%lx", + __entry->cpu_nr, __entry->priority, __entry->pending) +); + #endif #endif /* _TRACE_KVM_H */ From 2d8185d4ee22f425001d28d1817fc8d478e6fa02 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 12:31:12 +0200 Subject: [PATCH 19/56] KVM: PPC: BookE: No duplicate request != 0 check We only call kvmppc_check_requests() when vcpu->requests != 0, so drop the redundant check in the function itself Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 00bcc57428c76..683cbd686d014 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -457,16 +457,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) static void kvmppc_check_requests(struct kvm_vcpu *vcpu) { - if (vcpu->requests) { - trace_kvm_check_requests(vcpu); + trace_kvm_check_requests(vcpu); - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) - update_timer_ints(vcpu); + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) + update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvmppc_core_flush_tlb(vcpu); + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_core_flush_tlb(vcpu); #endif - } } /* From 03d25c5bd5c3125055bd36f4813ddb817def19dd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 12:28:50 +0200 Subject: [PATCH 20/56] KVM: PPC: Use same kvmppc_prepare_to_enter code for booke and book3s_pr We need to do the same things when preparing to enter a guest for booke and book3s_pr cores. Fold the generic code into a generic function that both call. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 3 ++ arch/powerpc/kvm/book3s_pr.c | 22 ++++-------- arch/powerpc/kvm/booke.c | 58 +----------------------------- arch/powerpc/kvm/powerpc.c | 57 +++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 73 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 88de3146838b4..59b7c87e47f70 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,6 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong val); extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *val); +extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); @@ -150,6 +151,8 @@ extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); +extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7f0fe6f9e297a..cae2defd14623 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -88,6 +88,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } +void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +{ +} + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { ulong smsr = vcpu->arch.shared->msr; @@ -815,19 +819,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * again due to a host external interrupt. */ __hard_irq_disable(); - if (signal_pending(current)) { - __hard_irq_enable(); -#ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM: Going back to host\n"); -#endif - vcpu->stat.signal_exits++; + if (kvmppc_prepare_to_enter(vcpu)) { run->exit_reason = KVM_EXIT_INTR; r = -EINTR; - } else { - /* In case an interrupt came in that was triggered - * from userspace (like DEC), we need to check what - * to inject now! */ - kvmppc_core_prepare_to_enter(vcpu); } } @@ -1029,8 +1023,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; } - kvmppc_core_prepare_to_enter(vcpu); - /* * Interrupts could be timers for the guest which we have to inject * again, so let's postpone them until we're in the guest and if we @@ -1038,9 +1030,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * a host external interrupt. */ __hard_irq_disable(); - - /* No need to go into the guest when all we do is going out */ - if (signal_pending(current)) { + if (kvmppc_prepare_to_enter(vcpu)) { __hard_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 683cbd686d014..4652e0bfa781d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -455,10 +455,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -static void kvmppc_check_requests(struct kvm_vcpu *vcpu) +void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { - trace_kvm_check_requests(vcpu); - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) @@ -467,60 +465,6 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu) #endif } -/* - * Common checks before entering the guest world. Call with interrupts - * disabled. - * - * returns !0 if a signal is pending and check_signal is true - */ -static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) -{ - int r = 0; - - WARN_ON_ONCE(!irqs_disabled()); - while (true) { - if (need_resched()) { - local_irq_enable(); - cond_resched(); - local_irq_disable(); - continue; - } - - if (signal_pending(current)) { - r = 1; - break; - } - - smp_mb(); - if (vcpu->requests) { - /* Make sure we process requests preemptable */ - local_irq_enable(); - kvmppc_check_requests(vcpu); - local_irq_disable(); - continue; - } - - if (kvmppc_core_prepare_to_enter(vcpu)) { - /* interrupts got enabled in between, so we - are back at square 1 */ - continue; - } - - if (vcpu->mode == EXITING_GUEST_MODE) { - r = 1; - break; - } - - /* Going into guest context! Yay! */ - vcpu->mode = IN_GUEST_MODE; - smp_wmb(); - - break; - } - - return r; -} - int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 45fe433316ea8..153a26abc915e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -47,6 +47,63 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } +#ifndef CONFIG_KVM_BOOK3S_64_HV +/* + * Common checks before entering the guest world. Call with interrupts + * disabled. + * + * returns !0 if a signal is pending and check_signal is true + */ +int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) +{ + int r = 0; + + WARN_ON_ONCE(!irqs_disabled()); + while (true) { + if (need_resched()) { + local_irq_enable(); + cond_resched(); + local_irq_disable(); + continue; + } + + if (signal_pending(current)) { + r = 1; + break; + } + + smp_mb(); + if (vcpu->requests) { + /* Make sure we process requests preemptable */ + local_irq_enable(); + trace_kvm_check_requests(vcpu); + kvmppc_core_check_requests(vcpu); + local_irq_disable(); + continue; + } + + if (kvmppc_core_prepare_to_enter(vcpu)) { + /* interrupts got enabled in between, so we + are back at square 1 */ + continue; + } + + if (vcpu->mode == EXITING_GUEST_MODE) { + r = 1; + break; + } + + /* Going into guest context! Yay! */ + vcpu->mode = IN_GUEST_MODE; + smp_wmb(); + + break; + } + + return r; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { int nr = kvmppc_get_gpr(vcpu, 11); From 9b0cb3c808fef0d75d6f79ab9684246e6879f9c1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 13:23:55 +0200 Subject: [PATCH 21/56] KVM: PPC: Book3s: PR: Add (dumb) MMU Notifier support Now that we have very simple MMU Notifier support for e500 in place, also add the same simple support to book3s. It gets us one step closer to actual fast support. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/book3s_32_mmu_host.c | 1 + arch/powerpc/kvm/book3s_64_mmu_host.c | 1 + arch/powerpc/kvm/book3s_mmu_hpte.c | 5 --- arch/powerpc/kvm/book3s_pr.c | 47 +++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cea9d3aab71c9..4a5ec8f573c7f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,8 +46,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \ - defined(CONFIG_KVM_E500MC) +#if !defined(CONFIG_KVM_440) #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 40cad8c8bd0e8..71f0cd9edf335 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM_BOOK3S_64_HANDLER config KVM_BOOK3S_PR bool select KVM_MMIO + select MMU_NOTIFIER config KVM_BOOK3S_32 tristate "KVM support for PowerPC book3s_32 processors" diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 837f13e7b6bfc..9fac0101ffb98 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -254,6 +254,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) kvmppc_mmu_hpte_cache_map(vcpu, pte); + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); out: return r; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0688b6b395859..6b2c80e496813 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -168,6 +168,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) kvmppc_mmu_hpte_cache_map(vcpu, pte); } + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); out: return r; diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 41cb0017e757a..2c86b0d637149 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) hlist_del_init_rcu(&pte->list_vpte); hlist_del_init_rcu(&pte->list_vpte_long); - if (pte->pte.may_write) - kvm_release_pfn_dirty(pte->pfn); - else - kvm_release_pfn_clean(pte->pfn); - spin_unlock(&vcpu3s->mmu_lock); vcpu3s->hpte_cache_count--; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cae2defd14623..10f8217b8c38c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -90,8 +90,55 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + /* We misuse TLB_FLUSH to indicate that we want to clear + all shadow cache entries */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_mmu_pte_flush(vcpu, 0, 0); } +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_unmap_hva(hva); + + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { ulong smsr = vcpu->arch.shared->msr; From e85ad380c6bf6dcd4776d313c81d16a6293db136 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:13:25 +0200 Subject: [PATCH 22/56] KVM: PPC: BookE: Drop redundant vcpu->mode set We only need to set vcpu->mode to outside once. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4652e0bfa781d..492c343f598ee 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -528,8 +528,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvm_guest_exit(); - vcpu->mode = OUTSIDE_GUEST_MODE; - smp_wmb(); out: vcpu->mode = OUTSIDE_GUEST_MODE; From c63ddcb4540db95e5a4223cfa8cdbe6efbd5e386 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:27:49 +0200 Subject: [PATCH 23/56] KVM: PPC: Book3S: PR: Only do resched check once per exit Now that we use our generic exit helper, we can safely drop our previous kvm_resched that we used to trigger at the beginning of the exit handler function. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 10f8217b8c38c..2c268a15b20fa 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -602,7 +602,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, trace_kvm_exit(exit_nr, vcpu); preempt_enable(); - kvm_resched(vcpu); + switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: { From 706fb730cb4f9db2e3de33391475dd0616c2c935 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:29:09 +0200 Subject: [PATCH 24/56] KVM: PPC: Exit guest context while handling exit The x86 implementation of KVM accounts for host time while processing guest exits. Do the same for us. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 ++ arch/powerpc/kvm/booke.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 2c268a15b20fa..b4ae11ec068f0 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -601,6 +601,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, __hard_irq_enable(); trace_kvm_exit(exit_nr, vcpu); + kvm_guest_exit(); preempt_enable(); switch (exit_nr) { @@ -872,6 +873,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } } + kvm_guest_enter(); trace_kvm_book3s_reenter(r, vcpu); return r; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 492c343f598ee..887c7cc021462 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -650,6 +650,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); + kvm_guest_exit(); run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; @@ -952,6 +953,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } } + kvm_guest_enter(); + return r; } From 0652eaaebea0995b3236e51dec727d62264f4248 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:34:21 +0200 Subject: [PATCH 25/56] KVM: PPC: Book3S: PR: Indicate we're out of guest mode When going out of guest mode, indicate that we are in vcpu->mode. That way requests from other CPUs don't needlessly need to kick us to process them, because it'll just happen next time we enter the guest. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b4ae11ec068f0..9430a362e5a3c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1152,6 +1152,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif out: + vcpu->mode = OUTSIDE_GUEST_MODE; preempt_enable(); return ret; } From 24afa37b9c8f035d2fe2028e4824bc4e49bafe73 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 12:42:30 +0200 Subject: [PATCH 26/56] KVM: PPC: Consistentify vcpu exit path When getting out of __vcpu_run, let's be consistent about the state we return in. We want to always * have IRQs enabled * have called kvm_guest_exit before Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 8 ++++++-- arch/powerpc/kvm/booke.c | 13 ++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9430a362e5a3c..3dec346c4b933 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -868,12 +868,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, */ __hard_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + /* local_irq_enable(); */ run->exit_reason = KVM_EXIT_INTR; r = -EINTR; + } else { + /* Going back to guest */ + kvm_guest_enter(); } } - kvm_guest_enter(); trace_kvm_book3s_reenter(r, vcpu); return r; @@ -1123,7 +1126,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); - kvm_guest_exit(); + /* No need for kvm_guest_exit. It's done in handle_exit. + We also get here with interrupts enabled. */ current->thread.regs->msr = ext_msr; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 887c7cc021462..aae535f6d9de3 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -481,6 +481,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + local_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -512,6 +513,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); + /* No need for kvm_guest_exit. It's done in handle_exit. + We also get here with interrupts enabled. */ + #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); @@ -527,12 +531,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) current->thread.fpexc_mode = fpexc_mode; #endif - kvm_guest_exit(); - out: vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); return ret; } @@ -947,14 +948,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (!(r & RESUME_HOST)) { local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + local_irq_enable(); run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); + } else { + /* Going back to guest */ + kvm_guest_enter(); } } - kvm_guest_enter(); - return r; } From bd2be6836ee493d41fe42367a2b129aa771185c1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 01:04:19 +0200 Subject: [PATCH 27/56] KVM: PPC: Book3S: PR: Rework irq disabling Today, we disable preemption while inside guest context, because we need to expose to the world that we are not in a preemptible context. However, during that time we already have interrupts disabled, which would indicate that we are in a non-preemptible context. The reason the checks for irqs_disabled() fail for us though is that we manually control hard IRQs and ignore all the lazy EE framework. Let's stop doing that. Instead, let's always use lazy EE to indicate when we want to disable IRQs, but do a special final switch that gets us into EE disabled, but soft enabled state. That way when we get back out of guest state, we are immediately ready to process interrupts. This simplifies the code drastically and reduces the time that we appear as preempt disabled. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 10 ++++++++++ arch/powerpc/kvm/book3s_pr.c | 21 +++++++-------------- arch/powerpc/kvm/book3s_rmhandlers.S | 15 ++++++++------- arch/powerpc/kvm/booke.c | 2 ++ arch/powerpc/kvm/powerpc.c | 14 ++++++++++++++ 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 59b7c87e47f70..545936428bf69 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -234,5 +234,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) } } +/* Please call after prepare_to_enter. This function puts the lazy ee state + back to normal mode, without actually enabling interrupts. */ +static inline void kvmppc_lazy_ee_enable(void) +{ +#ifdef CONFIG_PPC64 + /* Only need to enable IRQs by hard enabling them after this */ + local_paca->irq_happened = 0; + local_paca->soft_enabled = 1; +#endif +} #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3dec346c4b933..e737db8a5ca78 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #define MSR_USER32 MSR_USER #define MSR_USER64 MSR_USER #define HW_PAGE_SIZE PAGE_SIZE -#define __hard_irq_disable local_irq_disable -#define __hard_irq_enable local_irq_enable #endif void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -597,12 +595,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; - /* We get here with MSR.EE=0, so enable it to be a nice citizen */ - __hard_irq_enable(); + /* We get here with MSR.EE=1 */ trace_kvm_exit(exit_nr, vcpu); kvm_guest_exit(); - preempt_enable(); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: @@ -854,7 +850,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - preempt_disable(); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -866,14 +861,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * and if we really did time things so badly, then we just exit * again due to a host external interrupt. */ - __hard_irq_disable(); + local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { - /* local_irq_enable(); */ + local_irq_enable(); run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { /* Going back to guest */ kvm_guest_enter(); + kvmppc_lazy_ee_enable(); } } @@ -1066,8 +1062,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif ulong ext_msr; - preempt_disable(); - /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -1081,9 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * really did time things so badly, then we just exit again due to * a host external interrupt. */ - __hard_irq_disable(); + local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { - __hard_irq_enable(); + local_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -1122,7 +1116,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - kvm_guest_enter(); + kvmppc_lazy_ee_enable(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1157,7 +1151,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) out: vcpu->mode = OUTSIDE_GUEST_MODE; - preempt_enable(); return ret; } diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9ecf6e35cd8de..b2f8258b545ae 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -170,20 +170,21 @@ kvmppc_handler_skip_ins: * Call kvmppc_handler_trampoline_enter in real mode * * On entry, r4 contains the guest shadow MSR + * MSR.EE has to be 0 when calling this function */ _GLOBAL(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) - li r9, MSR_RI - ori r9, r9, MSR_EE - andc r9, r5, r9 /* Clear EE and RI in MSR value */ li r6, MSR_IR | MSR_DR - ori r6, r6, MSR_EE - andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ - MTMSR_EERI(r9) /* Clear EE and RI in MSR */ - mtsrr0 r7 /* before we set srr0/1 */ + andc r6, r5, r6 /* Clear DR and IR in MSR value */ + /* + * Set EE in HOST_MSR so that it's enabled when we get into our + * C exit handler function + */ + ori r5, r5, MSR_EE + mtsrr0 r7 mtsrr1 r6 RFI diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index aae535f6d9de3..2bd190c488eff 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -486,6 +486,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = -EINTR; goto out; } + kvmppc_lazy_ee_enable(); kvm_guest_enter(); @@ -955,6 +956,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } else { /* Going back to guest */ kvm_guest_enter(); + kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 153a26abc915e..266549979e9f6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "timing.h" #include "../mm/mmu_decl.h" @@ -93,6 +94,19 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } +#ifdef CONFIG_PPC64 + /* lazy EE magic */ + hard_irq_disable(); + if (lazy_irq_pending()) { + /* Got an interrupt in between, try again */ + local_irq_enable(); + local_irq_disable(); + continue; + } + + trace_hardirqs_on(); +#endif + /* Going into guest context! Yay! */ vcpu->mode = IN_GUEST_MODE; smp_wmb(); From 3766a4c693358cff33441310413e3776dbbf8ef0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 01:24:01 +0200 Subject: [PATCH 28/56] KVM: PPC: Move kvm_guest_enter call into generic code We need to call kvm_guest_enter in booke and book3s, so move its call to generic code. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 -- arch/powerpc/kvm/booke.c | 2 -- arch/powerpc/kvm/powerpc.c | 3 +++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e737db8a5ca78..1ff0d6ccc5897 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -867,8 +867,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { - /* Going back to guest */ - kvm_guest_enter(); kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2bd190c488eff..5e8dc19091304 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -954,8 +954,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); } else { - /* Going back to guest */ - kvm_guest_enter(); kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 266549979e9f6..6646574bf9302 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -101,12 +101,15 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) /* Got an interrupt in between, try again */ local_irq_enable(); local_irq_disable(); + kvm_guest_exit(); continue; } trace_hardirqs_on(); #endif + kvm_guest_enter(); + /* Going into guest context! Yay! */ vcpu->mode = IN_GUEST_MODE; smp_wmb(); From 206c2ed7f1ea55222bde2954ee3d65c2e9cfb750 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:43:33 +0200 Subject: [PATCH 29/56] KVM: PPC: Ignore EXITING_GUEST_MODE mode We don't need to do anything when mode is EXITING_GUEST_MODE, because we essentially are outside of guest mode and did everything it asked us to do by the time we check it. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6646574bf9302..dc86371b9953d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -89,11 +89,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - if (vcpu->mode == EXITING_GUEST_MODE) { - r = 1; - break; - } - #ifdef CONFIG_PPC64 /* lazy EE magic */ hard_irq_disable(); From 7ee788556bf395a8ef413bea33494df29a3409e0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:44:41 +0200 Subject: [PATCH 30/56] KVM: PPC: Add return value in prepare_to_enter Our prepare_to_enter helper wants to be able to return in more circumstances to the host than only when an interrupt is pending. Broaden the interface a bit and move even more generic code to the generic helper. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 12 ++++++------ arch/powerpc/kvm/booke.c | 16 ++++++++-------- arch/powerpc/kvm/powerpc.c | 11 ++++++++--- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 1ff0d6ccc5897..71fa0f1873b3c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -589,6 +589,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { int r = RESUME_HOST; + int s; vcpu->stat.sum_exits++; @@ -862,10 +863,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * again due to a host external interrupt. */ local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - run->exit_reason = KVM_EXIT_INTR; - r = -EINTR; + r = s; } else { kvmppc_lazy_ee_enable(); } @@ -1074,10 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * a host external interrupt. */ local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + ret = kvmppc_prepare_to_enter(vcpu); + if (ret <= 0) { local_irq_enable(); - kvm_run->exit_reason = KVM_EXIT_INTR; - ret = -EINTR; goto out; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5e8dc19091304..1917802463f5a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -467,7 +467,7 @@ void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int ret; + int ret, s; #ifdef CONFIG_PPC_FPU unsigned int fpscr; int fpexc_mode; @@ -480,10 +480,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - kvm_run->exit_reason = KVM_EXIT_INTR; - ret = -EINTR; + ret = s; goto out; } kvmppc_lazy_ee_enable(); @@ -642,6 +642,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { int r = RESUME_HOST; + int s; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -948,11 +949,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (!(r & RESUME_HOST)) { local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - run->exit_reason = KVM_EXIT_INTR; - r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - kvmppc_account_exit(vcpu, SIGNAL_EXITS); + r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); } else { kvmppc_lazy_ee_enable(); } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dc86371b9953d..0e2a98ab6a771 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -53,11 +53,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) * Common checks before entering the guest world. Call with interrupts * disabled. * - * returns !0 if a signal is pending and check_signal is true + * returns: + * + * == 1 if we're ready to go into guest state + * <= 0 if we need to go back to the host with return value */ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) { - int r = 0; + int r = 1; WARN_ON_ONCE(!irqs_disabled()); while (true) { @@ -69,7 +72,9 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) } if (signal_pending(current)) { - r = 1; + kvmppc_account_exit(vcpu, SIGNAL_EXITS); + vcpu->run->exit_reason = KVM_EXIT_INTR; + r = -EINTR; break; } From 7c973a2ebb8fb9c8ee2ae9647f9ad7b0ad58a3e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:50:35 +0200 Subject: [PATCH 31/56] KVM: PPC: Add return value to core_check_requests Requests may want to tell us that we need to go back into host state, so add a return value for the checks. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_pr.c | 6 +++++- arch/powerpc/kvm/booke.c | 6 +++++- arch/powerpc/kvm/powerpc.c | 6 ++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 545936428bf69..3dfc437fb9d96 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,7 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong val); extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *val); -extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu); +extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 71fa0f1873b3c..b3c584f94cb33 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -86,12 +86,16 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } -void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + int r = 1; /* Indicate we want to get back into the guest */ + /* We misuse TLB_FLUSH to indicate that we want to clear all shadow cache entries */ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_mmu_pte_flush(vcpu, 0, 0); + + return r; } /************* MMU Notifiers *************/ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1917802463f5a..c36493087dbf8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -455,14 +455,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + int r = 1; /* Indicate we want to get back into the guest */ + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_core_flush_tlb(vcpu); #endif + + return r; } int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0e2a98ab6a771..54b12af577d05 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -83,9 +83,11 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) /* Make sure we process requests preemptable */ local_irq_enable(); trace_kvm_check_requests(vcpu); - kvmppc_core_check_requests(vcpu); + r = kvmppc_core_check_requests(vcpu); local_irq_disable(); - continue; + if (r > 0) + continue; + break; } if (kvmppc_core_prepare_to_enter(vcpu)) { From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 20:38:19 +0000 Subject: [PATCH 32/56] KVM: PPC: booke: Add watchdog emulation This patch adds the watchdog emulation in KVM. The watchdog emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl. The kernel timer are used for watchdog emulation and emulates h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how it is configured. Signed-off-by: Liu Yu Signed-off-by: Scott Wood [bharat.bhushan@freescale.com: reworked patch] Signed-off-by: Bharat Bhushan [agraf: adjust to new request framework] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 + arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/asm/reg_booke.h | 7 ++ arch/powerpc/kvm/book3s.c | 9 ++ arch/powerpc/kvm/booke.c | 155 +++++++++++++++++++++++++++ arch/powerpc/kvm/booke_emulate.c | 8 ++ arch/powerpc/kvm/powerpc.c | 14 ++- include/linux/kvm.h | 2 + include/linux/kvm_host.h | 1 + 9 files changed, 199 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a5ec8f573c7f..51b0ccd56769d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -471,6 +471,8 @@ struct kvm_vcpu_arch { ulong fault_esr; ulong queued_dear; ulong queued_esr; + spinlock_t wdt_lock; + struct timer_list wdt_timer; u32 tlbcfg[4]; u32 mmucfg; u32 epr; @@ -486,6 +488,7 @@ struct kvm_vcpu_arch { u8 osi_needed; u8 osi_enabled; u8 papr_enabled; + u8 watchdog_enabled; u8 sane; u8 cpu_type; u8 hcall_needed; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3dfc437fb9d96..c06a64b533623 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); extern void kvmppc_decrementer_func(unsigned long data); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); /* Core-specific hooks */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2d916c4982c51..e07e6af5e1ff9 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -539,6 +539,13 @@ #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ #define TCR_ARE 0x00400000 /* Auto Reload Enable */ +#ifdef CONFIG_E500 +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \ + (((tcr) & 0x1E0000) >> 15)) +#else +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30) +#endif + /* Bit definitions for the TSR. */ #define TSR_ENW 0x80000000 /* Enable Next Watchdog */ #define TSR_WIS 0x40000000 /* WDT Interrupt Status */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3f2a8360c857f..e94666566fa93 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c36493087dbf8..09e8bf33a8c97 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -209,6 +209,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG); +} + +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { #ifdef CONFIG_KVM_BOOKE_HV @@ -328,6 +338,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; break; + case BOOKE_IRQPRIO_WATCHDOG: case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; @@ -407,12 +418,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, return allowed; } +/* + * Return the number of jiffies until the next timeout. If the timeout is + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA + * because the larger value can break the timer APIs. + */ +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) +{ + u64 tb, wdt_tb, wdt_ticks = 0; + u64 nr_jiffies = 0; + u32 period = TCR_GET_WP(vcpu->arch.tcr); + + wdt_tb = 1ULL << (63 - period); + tb = get_tb(); + /* + * The watchdog timeout will hapeen when TB bit corresponding + * to watchdog will toggle from 0 to 1. + */ + if (tb & wdt_tb) + wdt_ticks = wdt_tb; + + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1)); + + /* Convert timebase ticks to jiffies */ + nr_jiffies = wdt_ticks; + + if (do_div(nr_jiffies, tb_ticks_per_jiffy)) + nr_jiffies++; + + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); +} + +static void arm_next_watchdog(struct kvm_vcpu *vcpu) +{ + unsigned long nr_jiffies; + unsigned long flags; + + /* + * If TSR_ENW and TSR_WIS are not set then no need to exit to + * userspace, so clear the KVM_REQ_WATCHDOG request. + */ + if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS)) + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); + + spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); + nr_jiffies = watchdog_next_timeout(vcpu); + /* + * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA + * then do not run the watchdog timer as this can break timer APIs. + */ + if (nr_jiffies < NEXT_TIMER_MAX_DELTA) + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); + else + del_timer(&vcpu->arch.wdt_timer); + spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); +} + +void kvmppc_watchdog_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + u32 tsr, new_tsr; + int final; + + do { + new_tsr = tsr = vcpu->arch.tsr; + final = 0; + + /* Time out event */ + if (tsr & TSR_ENW) { + if (tsr & TSR_WIS) + final = 1; + else + new_tsr = tsr | TSR_WIS; + } else { + new_tsr = tsr | TSR_ENW; + } + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr); + + if (new_tsr & TSR_WIS) { + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * If this is final watchdog expiry and some action is required + * then exit to userspace. + */ + if (final && (vcpu->arch.tcr & TCR_WRC_MASK) && + vcpu->arch.watchdog_enabled) { + smp_wmb(); + kvm_make_request(KVM_REQ_WATCHDOG, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * Stop running the watchdog timer after final expiration to + * prevent the host from being flooded with timers if the + * guest sets a short period. + * Timers will resume when TSR/TCR is updated next time. + */ + if (!final) + arm_next_watchdog(vcpu); +} + static void update_timer_ints(struct kvm_vcpu *vcpu) { if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) kvmppc_core_queue_dec(vcpu); else kvmppc_core_dequeue_dec(vcpu); + + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS)) + kvmppc_core_queue_watchdog(vcpu); + else + kvmppc_core_dequeue_watchdog(vcpu); } static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) @@ -466,6 +586,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) kvmppc_core_flush_tlb(vcpu); #endif + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_WATCHDOG; + r = 0; + } + return r; } @@ -995,6 +1120,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return r; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + /* setup watchdog timer once */ + spin_lock_init(&vcpu->arch.wdt_lock); + setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, + (unsigned long)vcpu); + + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + del_timer_sync(&vcpu->arch.wdt_timer); +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; @@ -1090,7 +1230,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, } if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + u32 old_tsr = vcpu->arch.tsr; + vcpu->arch.tsr = sregs->u.e.tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } @@ -1251,6 +1397,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; + arm_next_watchdog(vcpu); update_timer_ints(vcpu); } @@ -1265,6 +1412,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) { clear_bits(tsr_bits, &vcpu->arch.tsr); + + /* + * We may have stopped the watchdog due to + * being stuck on final expiration. + */ + if (tsr_bits & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 12834bb608aba..5a66ade7fd17d 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) kvmppc_clr_tsr_bits(vcpu, spr_val); break; case SPRN_TCR: + /* + * WRC is a 2-bit field that is supposed to preserve its + * value once written to non-zero. + */ + if (vcpu->arch.tcr & TCR_WRC_MASK) { + spr_val &= ~TCR_WRC_MASK; + spr_val |= vcpu->arch.tcr & TCR_WRC_MASK; + } kvmppc_set_tcr(vcpu, spr_val); break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 54b12af577d05..0ffd7d17adc75 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -300,6 +300,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: + case KVM_CAP_PPC_BOOKE_WATCHDOG: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -476,6 +477,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + int ret; + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; @@ -484,13 +487,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); #endif - - return 0; + ret = kvmppc_subarch_vcpu_init(vcpu); + return ret; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -735,6 +739,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_WATCHDOG: + r = 0; + vcpu->arch.watchdog_enabled = true; + break; +#endif #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4cb3761bebae9..1649d4b57e1fa 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -167,6 +167,7 @@ struct kvm_pit_config { #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_WATCHDOG 21 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -628,6 +629,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_READONLY_MEM 81 #endif #define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2850656e2e96a..0ca3663206f81 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -118,6 +118,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_IMMEDIATE_EXIT 15 #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 +#define KVM_REQ_WATCHDOG 18 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 From 6df8d3fc58dde84fc82a9ec2581440e54dfd3d14 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 21:17:55 +0000 Subject: [PATCH 33/56] booke: Added ONE_REG interface for IAC/DAC debug registers IAC/DAC are defined as 32 bit while they are 64 bit wide. So ONE_REG interface is added to set/get them. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm.h | 12 ++++++++ arch/powerpc/include/asm/kvm_host.h | 24 +++++++++++++-- arch/powerpc/kvm/booke.c | 48 +++++++++++++++++++++++++++-- arch/powerpc/kvm/booke_emulate.c | 8 ++--- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 1bea4d8ea6f43..3c14202a3c84a 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -221,6 +221,12 @@ struct kvm_sregs { __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ __u32 dbcr[3]; + /* + * iac/dac registers are 64bit wide, while this API + * interface provides only lower 32 bits on 64 bit + * processors. ONE_REG interface is added for 64bit + * iac/dac registers. + */ __u32 iac[4]; __u32 dac[2]; __u32 dvc[2]; @@ -326,5 +332,11 @@ struct kvm_book3e_206_tlb_params { }; #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) +#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) +#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) +#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 51b0ccd56769d..f20a5ef1c7e8c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -346,6 +346,27 @@ struct kvmppc_slb { bool class : 1; }; +# ifdef CONFIG_PPC_FSL_BOOK3E +#define KVMPPC_BOOKE_IAC_NUM 2 +#define KVMPPC_BOOKE_DAC_NUM 2 +# else +#define KVMPPC_BOOKE_IAC_NUM 4 +#define KVMPPC_BOOKE_DAC_NUM 2 +# endif +#define KVMPPC_BOOKE_MAX_IAC 4 +#define KVMPPC_BOOKE_MAX_DAC 2 + +struct kvmppc_booke_debug_reg { + u32 dbcr0; + u32 dbcr1; + u32 dbcr2; +#ifdef CONFIG_KVM_E500MC + u32 dbcr4; +#endif + u64 iac[KVMPPC_BOOKE_MAX_IAC]; + u64 dac[KVMPPC_BOOKE_MAX_DAC]; +}; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -440,8 +461,6 @@ struct kvm_vcpu_arch { u32 ccr0; u32 ccr1; - u32 dbcr0; - u32 dbcr1; u32 dbsr; u64 mmcr[3]; @@ -476,6 +495,7 @@ struct kvm_vcpu_arch { u32 tlbcfg[4]; u32 mmucfg; u32 epr; + struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 09e8bf33a8c97..959aae96469cb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1351,12 +1351,56 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - return -EINVAL; + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_IAC1: + case KVM_REG_PPC_IAC2: + case KVM_REG_PPC_IAC3: + case KVM_REG_PPC_IAC4: { + int iac = reg->id - KVM_REG_PPC_IAC1; + r = copy_to_user((u64 __user *)(long)reg->addr, + &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); + break; + } + case KVM_REG_PPC_DAC1: + case KVM_REG_PPC_DAC2: { + int dac = reg->id - KVM_REG_PPC_DAC1; + r = copy_to_user((u64 __user *)(long)reg->addr, + &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); + break; + } + default: + break; + } + return r; } int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - return -EINVAL; + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_IAC1: + case KVM_REG_PPC_IAC2: + case KVM_REG_PPC_IAC3: + case KVM_REG_PPC_IAC4: { + int iac = reg->id - KVM_REG_PPC_IAC1; + r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], + (u64 __user *)(long)reg->addr, sizeof(u64)); + break; + } + case KVM_REG_PPC_DAC1: + case KVM_REG_PPC_DAC2: { + int dac = reg->id - KVM_REG_PPC_DAC1; + r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], + (u64 __user *)(long)reg->addr, sizeof(u64)); + break; + } + default: + break; + } + return r; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 5a66ade7fd17d..cc99a0b3d202b 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) vcpu->arch.csrr1 = spr_val; break; case SPRN_DBCR0: - vcpu->arch.dbcr0 = spr_val; + vcpu->arch.dbg_reg.dbcr0 = spr_val; break; case SPRN_DBCR1: - vcpu->arch.dbcr1 = spr_val; + vcpu->arch.dbg_reg.dbcr1 = spr_val; break; case SPRN_DBSR: vcpu->arch.dbsr &= ~spr_val; @@ -266,10 +266,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) *spr_val = vcpu->arch.csrr1; break; case SPRN_DBCR0: - *spr_val = vcpu->arch.dbcr0; + *spr_val = vcpu->arch.dbg_reg.dbcr0; break; case SPRN_DBCR1: - *spr_val = vcpu->arch.dbcr1; + *spr_val = vcpu->arch.dbg_reg.dbcr1; break; case SPRN_DBSR: *spr_val = vcpu->arch.dbsr; From 491dd5b8a4926393308172da80c73faf242a4057 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 14:40:29 +0200 Subject: [PATCH 34/56] KVM: PPC: 44x: Initialize PVR We need to make sure that vcpu->arch.pvr is initialized to a sane value, so let's just take the host PVR. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 50e7dbc7356cd..3d7fd21c65f92 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) vcpu_44x->shadow_refs[i].gtlb_index = -1; vcpu->arch.cpu_type = KVM_CPU_440; + vcpu->arch.pvr = mfspr(SPRN_PVR); return 0; } From 50c871edf59b4585fd2c17acfe4e7cd3752418b7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 14:50:54 +0200 Subject: [PATCH 35/56] KVM: PPC: BookE: Add MCSR SPR support Add support for the MCSR SPR. This only implements the SPR storage bits, not actual machine checks. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke_emulate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index cc99a0b3d202b..514790f41abaf 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -237,6 +237,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_IVOR15: vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; break; + case SPRN_MCSR: + vcpu->arch.mcsr &= ~spr_val; + break; default: emulated = EMULATE_FAIL; @@ -329,6 +332,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_IVOR15: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; break; + case SPRN_MCSR: + *spr_val = vcpu->arch.mcsr; + break; default: emulated = EMULATE_FAIL; From 166a2b7000c388aee81168987ce2eddb6783f550 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Aug 2012 01:38:43 +0200 Subject: [PATCH 36/56] KVM: PPC: Use symbols for exit trace Exit traces are a lot easier to read when you don't have to remember cryptic numbers for guest exit reasons. Symbolify them in our trace output. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/trace.h | 58 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index cb2780a42fd8a..519aba8bb3d38 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,6 +31,60 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); +#ifdef CONFIG_PPC_BOOK3S +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} +#else +#define kvm_trace_symbol_exit \ + {0, "CRITICAL"}, \ + {1, "MACHINE_CHECK"}, \ + {2, "DATA_STORAGE"}, \ + {3, "INST_STORAGE"}, \ + {4, "EXTERNAL"}, \ + {5, "ALIGNMENT"}, \ + {6, "PROGRAM"}, \ + {7, "FP_UNAVAIL"}, \ + {8, "SYSCALL"}, \ + {9, "AP_UNAVAIL"}, \ + {10, "DECREMENTER"}, \ + {11, "FIT"}, \ + {12, "WATCHDOG"}, \ + {13, "DTLB_MISS"}, \ + {14, "ITLB_MISS"}, \ + {15, "DEBUG"}, \ + {32, "SPE_UNAVAIL"}, \ + {33, "SPE_FP_DATA"}, \ + {34, "SPE_FP_ROUND"}, \ + {35, "PERFORMANCE_MONITOR"}, \ + {36, "DOORBELL"}, \ + {37, "DOORBELL_CRITICAL"}, \ + {38, "GUEST_DBELL"}, \ + {39, "GUEST_DBELL_CRIT"}, \ + {40, "HV_SYSCALL"}, \ + {41, "HV_PRIV"} +#endif + TRACE_EVENT(kvm_exit, TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), TP_ARGS(exit_nr, vcpu), @@ -62,7 +116,7 @@ TRACE_EVENT(kvm_exit, __entry->last_inst = vcpu->arch.last_inst; ), - TP_printk("exit=0x%x" + TP_printk("exit=%s" " | pc=0x%lx" " | msr=0x%lx" " | dar=0x%lx" @@ -71,7 +125,7 @@ TRACE_EVENT(kvm_exit, #endif " | last_inst=0x%lx" , - __entry->exit_nr, + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), __entry->pc, __entry->msr, __entry->dar, From 430c7ff52ffb902e1e08b255b93c28fcad8cb9ef Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Aug 2012 11:42:07 +0200 Subject: [PATCH 37/56] KVM: PPC: E500: Remove E500_TLB_DIRTY flag Since we always mark pages as dirty immediately when mapping them read/write now, there's no need for the dirty flag in our cache. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 3 +-- arch/powerpc/kvm/e500_tlb.c | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index aa8b81428bf4a..d1622864549ec 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -27,8 +27,7 @@ #define E500_TLB_NUM 2 #define E500_TLB_VALID 1 -#define E500_TLB_DIRTY 2 -#define E500_TLB_BITMAP 4 +#define E500_TLB_BITMAP 2 struct tlbe_ref { pfn_t pfn; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 1af6fab58995b..43489a8fa9856 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -303,10 +303,8 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags = E500_TLB_VALID; - if (tlbe_is_writable(gtlbe)) { - ref->flags |= E500_TLB_DIRTY; + if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); - } } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) From 2e2327023fe090d68aacad395178298645eaffea Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 15 Aug 2012 17:37:13 +0000 Subject: [PATCH 38/56] Document IACx/DACx registers access using ONE_REG API Patch to access the debug registers (IACx/DACx) using ONE_REG api was sent earlier. But that missed the respective documentation. Also corrected the index number referencing in section 4.69 Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 170afb1fbc2e1..c84739e2c0f37 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1734,7 +1734,12 @@ registers, find a list below: Arch | Register | Width (bits) | | PPC | KVM_REG_PPC_HIOR | 64 - + PPC | KVM_REG_PPC_IAC1 | 64 + PPC | KVM_REG_PPC_IAC2 | 64 + PPC | KVM_REG_PPC_IAC3 | 64 + PPC | KVM_REG_PPC_IAC4 | 64 + PPC | KVM_REG_PPC_DAC1 | 64 + PPC | KVM_REG_PPC_DAC2 | 64 4.69 KVM_GET_ONE_REG @@ -1750,7 +1755,7 @@ kvm_one_reg struct passed in. On success, the register value can be found at the memory location pointed to by "addr". The list of registers accessible using this interface is identical to the -list in 4.64. +list in 4.68. 4.70 KVM_KVMCLOCK_CTRL From e4dcfe88fb30bcedda80c151018086fffb8280e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 00:28:09 +0200 Subject: [PATCH 39/56] KVM: PPC: 440: Implement mtdcrx We need mtdcrx to execute properly on 460 cores. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index c8c61578fdfc6..3843a75e3e98d 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -28,11 +28,29 @@ #include "44x_tlb.h" #define XOP_MFDCR 323 +#define XOP_MTDCRX 387 #define XOP_MTDCR 451 #define XOP_TLBSX 914 #define XOP_ICCCI 966 #define XOP_TLBWE 978 +static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) +{ + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); + return EMULATE_DONE; + default: + vcpu->run->dcr.dcrn = dcrn; + vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); + vcpu->run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + return EMULATE_DO_DCR; + } +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -85,20 +103,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_MTDCR: - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = kvmppc_get_gpr(vcpu, rs); - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } + emulated = emulate_mtdcr(vcpu, rs, dcrn); + break; + case XOP_MTDCRX: + emulated = emulate_mtdcr(vcpu, rs, + kvmppc_get_gpr(vcpu, ra)); break; case XOP_TLBWE: From ceb985f9d18cba2efdef08b8d31751c2c2b20d77 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 00:34:58 +0200 Subject: [PATCH 40/56] KVM: PPC: 440: Implement mfdcrx We need mfdcrx to execute properly on 460 cores. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 74 ++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 3843a75e3e98d..1a793c4c4a679 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -27,6 +27,7 @@ #include "booke.h" #include "44x_tlb.h" +#define XOP_MFDCRX 259 #define XOP_MFDCR 323 #define XOP_MTDCRX 387 #define XOP_MTDCR 451 @@ -51,6 +52,43 @@ static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) } } +static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) +{ + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + kvmppc_set_gpr(vcpu, rt, + mfdcr(DCRN_CPR0_CONFIG_DATA)); + local_irq_enable(); + break; + default: + vcpu->run->dcr.dcrn = dcrn; + vcpu->run->dcr.data = 0; + vcpu->run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + return EMULATE_DO_DCR; + } + + return EMULATE_DONE; +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -68,38 +106,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case XOP_MFDCR: - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } + emulated = emulate_mfdcr(vcpu, rt, dcrn); + break; + case XOP_MFDCRX: + emulated = emulate_mfdcr(vcpu, rt, + kvmppc_get_gpr(vcpu, ra)); break; case XOP_MTDCR: From 7a08c2740f07fb8c3769d1f137721835ead7652f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 13:10:16 +0200 Subject: [PATCH 41/56] KVM: PPC: BookE: Support FPU on non-hv systems When running on HV aware hosts, we can not trap when the guest sets the FP bit, so we just let it do so when it wants to, because it has full access to MSR. For non-HV aware hosts with an FPU (like 440), we need to also adjust the shadow MSR though. Otherwise the guest gets an FP unavailable trap even when it really enabled the FP bit in MSR. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 959aae96469cb..5f0476a602d8b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -122,6 +122,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif +static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) +{ +#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) + /* We always treat the FP bit as enabled from the host + perspective, so only need to adjust the shadow MSR */ + vcpu->arch.shadow_msr &= ~MSR_FP; + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP; +#endif +} + /* * Helper function for "full" MSR writes. No need to call this if only * EE/CE/ME/DE/RI are changing. @@ -138,6 +148,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) kvmppc_mmu_msr_notify(vcpu, old_msr); kvmppc_vcpu_sync_spe(vcpu); + kvmppc_vcpu_sync_fpu(vcpu); } static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, From d61966fc08b84857b697ebae4489c652dd87e48a Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 12 Sep 2012 03:18:14 +0000 Subject: [PATCH 42/56] KVM: PPC: bookehv: Allow duplicate calls of DO_KVM macro The current form of DO_KVM macro restricts its use to one call per input parameter set. This is caused by kvmppc_resume_\intno\()_\srr1 symbol definition. Duplicate calls of DO_KVM are required by distinct implementations of exeption handlers which are delegated at runtime. Use a rare label number to avoid conflicts with the calling contexts. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_booke_hv_asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index 30a600fa1b6a9..a37a12a9a7d72 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -38,9 +38,9 @@ #ifdef CONFIG_KVM_BOOKE_HV BEGIN_FTR_SECTION mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ - bf 3, kvmppc_resume_\intno\()_\srr1 + bf 3, 1975f b kvmppc_handler_\intno\()_\srr1 -kvmppc_resume_\intno\()_\srr1: +1975: END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) #endif .endm From 2c9097e4c1340208ef93371abd4b3bd7e989381b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:27:01 +0000 Subject: [PATCH 43/56] KVM: PPC: Book3S HV: Take the SRCU read lock before looking up memslots The generic KVM code uses SRCU (sleeping RCU) to protect accesses to the memslots data structures against updates due to userspace adding, modifying or removing memory slots. We need to do that too, both to avoid accessing stale copies of the memslots and to avoid lockdep warnings. This therefore adds srcu_read_lock/unlock pairs around code that accesses and uses memslots. Since the real-mode handlers for H_ENTER, H_REMOVE and H_BULK_REMOVE need to access the memslots, and we don't want to call the SRCU code in real mode (since we have no assurance that it would only access the linear mapping), we hold the SRCU read lock for the VM while in the guest. This does mean that adding or removing memory slots while some vcpus are executing in the guest will block for up to two jiffies. This tradeoff is acceptable since adding/removing memory slots only happens rarely, while H_ENTER/H_REMOVE/H_BULK_REMOVE are performance-critical hot paths. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 17 +++++++++++++---- arch/powerpc/kvm/book3s_hv.c | 27 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d95d11322a159..0f031c07f7e54 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1057,20 +1058,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, unsigned long hva, psize, offset; unsigned long pa; unsigned long *physp; + int srcu_idx; + srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - return NULL; + goto err; if (!kvm->arch.using_mmu_notifiers) { physp = kvm->arch.slot_phys[memslot->id]; if (!physp) - return NULL; + goto err; physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) - return NULL; + goto err; pa = *physp; } page = pfn_to_page(pa >> PAGE_SHIFT); @@ -1079,9 +1082,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, hva = gfn_to_hva_memslot(memslot, gfn); npages = get_user_pages_fast(hva, 1, 1, pages); if (npages < 1) - return NULL; + goto err; page = pages[0]; } + srcu_read_unlock(&kvm->srcu, srcu_idx); + psize = PAGE_SIZE; if (PageHuge(page)) { page = compound_head(page); @@ -1091,6 +1096,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (nb_ret) *nb_ret = psize - offset; return page_address(page) + offset; + + err: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return NULL; } void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 83e929e66f9d8..48b0d4a73b9d5 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -366,13 +367,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; struct kvm_vcpu *tvcpu; + int idx; switch (req) { case H_ENTER: + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6), kvmppc_get_gpr(vcpu, 7)); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case H_CEDE: break; @@ -411,6 +415,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { int r = RESUME_HOST; + int srcu_idx; vcpu->stat.sum_exits++; @@ -470,12 +475,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * have been handled already. */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_book3s_hv_page_fault(run, vcpu, kvmppc_get_pc(vcpu), 0); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; /* * This occurs if the guest executes an illegal instruction. @@ -820,6 +829,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) long ret; u64 now; int ptid, i, need_vpa_update; + int srcu_idx; /* don't start if any threads have a signal pending */ need_vpa_update = 0; @@ -898,6 +908,9 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) spin_unlock(&vc->lock); kvm_guest_enter(); + + srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); + __kvmppc_vcore_entry(NULL, vcpu0); for (i = 0; i < threads_per_core; ++i) kvmppc_release_hwthread(vc->pcpu + i); @@ -913,6 +926,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_EXITING; spin_unlock(&vc->lock); + srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); + /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); kvm_guest_exit(); @@ -1362,6 +1377,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) unsigned long rmls; unsigned long *physp; unsigned long i, npages; + int srcu_idx; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1377,12 +1393,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } /* Look up the memslot for guest physical address 0 */ + srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, 0); /* We must have some memory at 0 by now */ err = -EINVAL; if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - goto out; + goto out_srcu; /* Look up the VMA for the start of this memory slot */ hva = memslot->userspace_addr; @@ -1406,14 +1423,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) err = -EPERM; if (cpu_has_feature(CPU_FTR_ARCH_201)) { pr_err("KVM: CPU requires an RMO\n"); - goto out; + goto out_srcu; } /* We can handle 4k, 64k or 16M pages in the VRMA */ err = -EINVAL; if (!(psize == 0x1000 || psize == 0x10000 || psize == 0x1000000)) - goto out; + goto out_srcu; /* Update VRMASD field in the LPCR */ senc = slb_pgsize_encoding(psize); @@ -1436,7 +1453,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) err = -EINVAL; if (rmls < 0) { pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); - goto out; + goto out_srcu; } atomic_inc(&ri->use_count); kvm->arch.rma = ri; @@ -1476,6 +1493,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) smp_wmb(); kvm->arch.rma_setup_done = 1; err = 0; + out_srcu: + srcu_read_unlock(&kvm->srcu, srcu_idx); out: mutex_unlock(&kvm->lock); return err; From a66b48c3a39fa1c4223d4f847fdc7a04ed1618de Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:27:46 +0000 Subject: [PATCH 44/56] KVM: PPC: Move kvm->arch.slot_phys into memslot.arch Now that we have an architecture-specific field in the kvm_memory_slot structure, we can use it to store the array of page physical addresses that we need for Book3S HV KVM on PPC970 processors. This reduces the size of struct kvm_arch for Book3S HV, and also reduces the size of struct kvm_arch_memory_slot for other PPC KVM variants since the fields in it are now only compiled in for Book3S HV. This necessitates making the kvm_arch_create_memslot and kvm_arch_free_memslot operations specific to each PPC KVM variant. That in turn means that we now don't allocate the rmap arrays on Book3S PR and Book E. Since we now unpin pages and free the slot_phys array in kvmppc_core_free_memslot, we no longer need to do it in kvmppc_core_destroy_vm, since the generic code takes care to free all the memslots when destroying a VM. We now need the new memslot to be passed in to kvmppc_core_prepare_memory_region, since we need to initialize its arch.slot_phys member on Book3S HV. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 9 +-- arch/powerpc/include/asm/kvm_ppc.h | 5 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +- arch/powerpc/kvm/book3s_hv.c | 104 ++++++++++++++++------------ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 12 ++++ arch/powerpc/kvm/booke.c | 12 ++++ arch/powerpc/kvm/powerpc.c | 13 +--- 8 files changed, 102 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f20a5ef1c7e8c..68f5a308737a1 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -204,7 +204,7 @@ struct revmap_entry { }; /* - * We use the top bit of each memslot->rmap entry as a lock bit, + * We use the top bit of each memslot->arch.rmap entry as a lock bit, * and bit 32 as a present flag. The bottom 32 bits are the * index in the guest HPT of a HPTE that points to the page. */ @@ -215,14 +215,17 @@ struct revmap_entry { #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful -/* Low-order bits in kvm->arch.slot_phys[][] */ +/* Low-order bits in memslot->arch.slot_phys[] */ #define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ #define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch_memory_slot { +#ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long *rmap; + unsigned long *slot_phys; +#endif /* CONFIG_KVM_BOOK3S_64_HV */ }; struct kvm_arch { @@ -246,8 +249,6 @@ struct kvm_arch { unsigned long hpt_npte; unsigned long hpt_mask; spinlock_t slot_phys_lock; - unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; - int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_linear_info *hpt_li; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c06a64b533623..41a00eae68c72 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -143,7 +143,12 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); +extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 0f031c07f7e54..a389cc62b16c7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -261,7 +261,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) /* * This is called to get a reference to a guest page if there isn't - * one already in the kvm->arch.slot_phys[][] arrays. + * one already in the memslot->arch.slot_phys[] array. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct kvm_memory_slot *memslot, @@ -276,7 +276,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct vm_area_struct *vma; unsigned long pfn, i, npages; - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) return -EINVAL; if (physp[gfn - memslot->base_gfn]) @@ -1065,7 +1065,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; if (!kvm->arch.using_mmu_notifiers) { - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) goto err; physp += gfn - memslot->base_gfn; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48b0d4a73b9d5..817837de7362d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1314,48 +1314,67 @@ static unsigned long slb_pgsize_encoding(unsigned long psize) return senc; } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) +static void unpin_slot(struct kvm_memory_slot *memslot) { - unsigned long npages; - unsigned long *phys; + unsigned long *physp; + unsigned long j, npages, pfn; + struct page *page; - /* Allocate a slot_phys array */ - phys = kvm->arch.slot_phys[mem->slot]; - if (!kvm->arch.using_mmu_notifiers && !phys) { - npages = mem->memory_size >> PAGE_SHIFT; - phys = vzalloc(npages * sizeof(unsigned long)); - if (!phys) - return -ENOMEM; - kvm->arch.slot_phys[mem->slot] = phys; - kvm->arch.slot_npages[mem->slot] = npages; + physp = memslot->arch.slot_phys; + npages = memslot->npages; + if (!physp) + return; + for (j = 0; j < npages; j++) { + if (!(physp[j] & KVMPPC_GOT_PAGE)) + continue; + pfn = physp[j] >> PAGE_SHIFT; + page = pfn_to_page(pfn); + SetPageDirty(page); + put_page(page); + } +} + +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + if (!dont || free->arch.rmap != dont->arch.rmap) { + vfree(free->arch.rmap); + free->arch.rmap = NULL; } + if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { + unpin_slot(free); + vfree(free->arch.slot_phys); + free->arch.slot_phys = NULL; + } +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); + if (!slot->arch.rmap) + return -ENOMEM; + slot->arch.slot_phys = NULL; return 0; } -static void unpin_slot(struct kvm *kvm, int slot_id) +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem) { - unsigned long *physp; - unsigned long j, npages, pfn; - struct page *page; + unsigned long *phys; - physp = kvm->arch.slot_phys[slot_id]; - npages = kvm->arch.slot_npages[slot_id]; - if (physp) { - spin_lock(&kvm->arch.slot_phys_lock); - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } - kvm->arch.slot_phys[slot_id] = NULL; - spin_unlock(&kvm->arch.slot_phys_lock); - vfree(physp); + /* Allocate a slot_phys array if needed */ + phys = memslot->arch.slot_phys; + if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { + phys = vzalloc(memslot->npages * sizeof(unsigned long)); + if (!phys) + return -ENOMEM; + memslot->arch.slot_phys = phys; } + + return 0; } void kvmppc_core_commit_memory_region(struct kvm *kvm, @@ -1482,11 +1501,16 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Initialize phys addrs of pages in RMO */ npages = ri->npages; porder = __ilog2(npages); - physp = kvm->arch.slot_phys[memslot->id]; - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) - physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; - spin_unlock(&kvm->arch.slot_phys_lock); + physp = memslot->arch.slot_phys; + if (physp) { + if (npages > memslot->npages) + npages = memslot->npages; + spin_lock(&kvm->arch.slot_phys_lock); + for (i = 0; i < npages; ++i) + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + + porder; + spin_unlock(&kvm->arch.slot_phys_lock); + } } /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ @@ -1547,12 +1571,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) void kvmppc_core_destroy_vm(struct kvm *kvm) { - unsigned long i; - - if (!kvm->arch.using_mmu_notifiers) - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - unpin_slot(kvm, i); - if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); kvm->arch.rma = NULL; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fb0e821622d4f..63eb94e63cc3a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -183,7 +183,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, rmap = &memslot->arch.rmap[slot_fn]; if (!kvm->arch.using_mmu_notifiers) { - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) return H_PARAMETER; physp += slot_fn; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b3c584f94cb33..fdadc9e57da2d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1220,7 +1220,19 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) } #endif /* CONFIG_PPC64 */ +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { return 0; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5f0476a602d8b..514405752988e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1438,7 +1438,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0ffd7d17adc75..33122dd89da94 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -389,19 +389,12 @@ long kvm_arch_dev_ioctl(struct file *filp, void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - if (!dont || free->arch.rmap != dont->arch.rmap) { - vfree(free->arch.rmap); - free->arch.rmap = NULL; - } + kvmppc_core_free_memslot(free, dont); } int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) { - slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); - if (!slot->arch.rmap) - return -ENOMEM; - - return 0; + return kvmppc_core_create_memslot(slot, npages); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -410,7 +403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - return kvmppc_core_prepare_memory_region(kvm, mem); + return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, From dfe49dbd1fc7310a4e0e2f83ae737cd7d34fa0cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:28:18 +0000 Subject: [PATCH 45/56] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly This adds an implementation of kvm_arch_flush_shadow_memslot for Book3S HV, and arranges for kvmppc_core_commit_memory_region to flush the dirty log when modifying an existing slot. With this, we can handle deletion and modification of memory slots. kvm_arch_flush_shadow_memslot calls kvmppc_core_flush_memslot, which on Book3S HV now traverses the reverse map chains to remove any HPT (hashed page table) entries referring to pages in the memslot. This gets called by generic code whenever deleting a memslot or changing the guest physical address for a memslot. We flush the dirty log in kvmppc_core_commit_memory_region for consistency with what x86 does. We only need to flush when an existing memslot is being modified, because for a new memslot the rmap array (which stores the dirty bits) is all zero, meaning that every page is considered clean already, and when deleting a memslot we obviously don't care about the dirty bits any more. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 5 +++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 33 +++++++++++++++++++++++---- arch/powerpc/kvm/book3s_hv.c | 18 +++++++++++++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 7 +++++- arch/powerpc/kvm/booke.c | 7 +++++- arch/powerpc/kvm/powerpc.c | 3 ++- 8 files changed, 64 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f0e0c6a66d973..ab738005d2eaf 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -160,7 +160,7 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 41a00eae68c72..3fb980d293e58 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -151,9 +151,12 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); +extern void kvmppc_core_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot); extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index a389cc62b16c7..f598366e51c64 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -851,7 +851,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, psize = hpte_page_size(hptep[0], ptel); if ((hptep[0] & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { - hptep[0] |= HPTE_V_ABSENT; + if (kvm->arch.using_mmu_notifiers) + hptep[0] |= HPTE_V_ABSENT; kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); @@ -878,6 +879,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + unsigned long *rmapp; + unsigned long gfn; + unsigned long n; + + rmapp = memslot->arch.rmap; + gfn = memslot->base_gfn; + for (n = memslot->npages; n; --n) { + /* + * Testing the present bit without locking is OK because + * the memslot has been marked invalid already, and hence + * no new HPTEs referencing this page can be created, + * thus the present bit can't go from 0 to 1. + */ + if (*rmapp & KVMPPC_RMAP_PRESENT) + kvm_unmap_rmapp(kvm, rmapp, gfn); + ++rmapp; + ++gfn; + } +} + static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long gfn) { @@ -1031,16 +1054,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) return ret; } -long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long *map) { unsigned long i; - unsigned long *rmapp, *map; + unsigned long *rmapp; preempt_disable(); rmapp = memslot->arch.rmap; - map = memslot->dirty_bitmap; for (i = 0; i < memslot->npages; ++i) { - if (kvm_test_clear_dirty(kvm, rmapp)) + if (kvm_test_clear_dirty(kvm, rmapp) && map) __set_bit_le(i, map); ++rmapp; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 817837de7362d..38c7f1bc3495c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1288,7 +1288,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); - r = kvmppc_hv_get_dirty_log(kvm, memslot); + r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); if (r) goto out; @@ -1378,8 +1378,22 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) { + unsigned long npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + if (npages && old.npages) { + /* + * If modifying a memslot, reset all the rmap dirty bits. + * If this is a new memslot, we don't need to do anything + * since the rmap array starts out as all zeroes, + * i.e. no pages are dirty. + */ + memslot = id_to_memslot(kvm->memslots, mem->slot); + kvmppc_hv_get_dirty_log(kvm, memslot, NULL); + } } static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 63eb94e63cc3a..9955216477a4c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, ptel = rev->guest_rpte |= rcbits; gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + if (!memslot) return; rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index fdadc9e57da2d..4d0667a810a46 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1239,7 +1239,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) +{ +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 514405752988e..3a6490fc6fcd3 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1457,7 +1457,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) +{ +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 33122dd89da94..8443e23f36052 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -411,7 +411,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc) { - kvmppc_core_commit_memory_region(kvm, mem); + kvmppc_core_commit_memory_region(kvm, mem, old); } void kvm_arch_flush_shadow_all(struct kvm *kvm) @@ -421,6 +421,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { + kvmppc_core_flush_memslot(kvm, slot); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) From ed7a8d7a3c5dd4adedb271112b6faba45c7037f9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 13 Sep 2012 21:44:30 +0000 Subject: [PATCH 46/56] KVM: Move some PPC ioctl definitions to the correct place This moves the definitions of KVM_CREATE_SPAPR_TCE and KVM_ALLOCATE_RMA in include/linux/kvm.h from the section listing the vcpu ioctls to the section listing VM ioctls, as these are both implemented and documented as VM ioctls. Fortunately there is no actual collision of ioctl numbers at this point. Moving these to the correct section will reduce the probability of a future collision. This does not change the user/kernel ABI at all. Signed-off-by: Paul Mackerras Acked-by: Alexander Graf Signed-off-by: Alexander Graf --- include/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 1649d4b57e1fa..65ad5c624c70d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -852,6 +852,9 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* Available with KVM_CAP_PPC_ALLOC_HTAB */ #define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* * ioctls for vcpu fds @@ -915,9 +918,6 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) -#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) -/* Available with KVM_CAP_RMA */ -#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_SW_TLB */ #define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) /* Available with KVM_CAP_ONE_REG */ From a47d72f3613d5edfd8e752c9b804d7df35810649 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:35:51 +0000 Subject: [PATCH 47/56] KVM: PPC: Book3S HV: Fix updates of vcpu->cpu This removes the powerpc "generic" updates of vcpu->cpu in load and put, and moves them to the various backends. The reason is that "HV" KVM does its own sauce with that field and the generic updates might corrupt it. The field contains the CPU# of the -first- HW CPU of the core always for all the VCPU threads of a core (the one that's online from a host Linux perspective). However, the preempt notifiers are going to be called on the threads VCPUs when they are running (due to them sleeping on our private waitqueue) causing unload to be called, potentially clobbering the value. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/booke.c | 2 ++ arch/powerpc/kvm/powerpc.c | 2 -- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 4d0667a810a46..bf3ec5d66d8c7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -64,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu_put(svcpu); #endif - + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; #endif @@ -84,6 +84,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP); kvmppc_giveup_ext(vcpu, MSR_VEC); kvmppc_giveup_ext(vcpu, MSR_VSX); + vcpu->cpu = -1; } int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3a6490fc6fcd3..69d047c22d20b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1509,12 +1509,14 @@ void kvmppc_decrementer_func(unsigned long data) void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu->cpu = smp_processor_id(); current->thread.kvm_vcpu = vcpu; } void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) { current->thread.kvm_vcpu = NULL; + vcpu->cpu = -1; } int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8443e23f36052..6002ea938a484 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -504,7 +504,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); #endif kvmppc_core_vcpu_load(vcpu, cpu); - vcpu->cpu = smp_processor_id(); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -513,7 +512,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #ifdef CONFIG_BOOKE vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); #endif - vcpu->cpu = -1; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, From 964ee98ccde0534548565a201827cf06d813180f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:36:32 +0000 Subject: [PATCH 48/56] KVM: PPC: Book3S HV: Remove bogus update of physical thread IDs When making a vcpu non-runnable we incorrectly changed the thread IDs of all other threads on the core, just remove that code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 38c7f1bc3495c..c9ae3148c981d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -706,17 +706,11 @@ extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) { - struct kvm_vcpu *v; - if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; --vc->n_runnable; ++vc->n_busy; - /* decrement the physical thread id of each following vcpu */ - v = vcpu; - list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) - --v->arch.ptid; list_del(&vcpu->arch.run_list); } From 70bddfefbdcdbfdebd81d8b59ff8a7fa5d450ccc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:39:21 +0000 Subject: [PATCH 49/56] KVM: PPC: Book3S HV: Fix calculation of guest phys address for MMIO emulation In the case where the host kernel is using a 64kB base page size and the guest uses a 4k HPTE (hashed page table entry) to map an emulated MMIO device, we were calculating the guest physical address wrongly. We were calculating a gfn as the guest physical address shifted right 16 bits (PAGE_SHIFT) but then only adding back in 12 bits from the effective address, since the HPTE had a 4k page size. Thus the gpa reported to userspace was missing 4 bits. Instead, we now compute the guest physical address from the HPTE without reference to the host page size, and then compute the gfn by shifting the gpa right PAGE_SHIFT bits. Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f598366e51c64..7a4aae99ac5b1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -571,7 +571,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; unsigned long *hptep, hpte[3], r; unsigned long mmu_seq, psize, pte_size; - unsigned long gfn, hva, pfn; + unsigned long gpa, gfn, hva, pfn; struct kvm_memory_slot *memslot; unsigned long *rmap; struct revmap_entry *rev; @@ -609,15 +609,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Translate the logical address and get the page */ psize = hpte_page_size(hpte[0], r); - gfn = hpte_rpn(r, psize); + gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); + gfn = gpa >> PAGE_SHIFT; memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { - unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); - } if (!kvm->arch.using_mmu_notifiers) return -EFAULT; /* should never get here */ From e400e72f250d2567e89c9bafb47ab91e8d9a15a2 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:04:23 +0000 Subject: [PATCH 50/56] KVM: PPC: e500: fix allocation size error on g2h_tlb1_map We were only allocating half the bytes we need, which was made more obvious by a recent fix to the memset in clear_tlb1_bitmap(). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Cc: stable@vger.kernel.org --- arch/powerpc/kvm/e500_tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 43489a8fa9856..a27d134eef367 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1385,7 +1385,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->gtlb_priv[1]) goto err; - vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) * + vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * vcpu_e500->gtlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->g2h_tlb1_map) From adbb48a854bf8dee556dc42b96dd61503351a82d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:04:24 +0000 Subject: [PATCH 51/56] KVM: PPC: e500: MMU API: fix leak of shared_tlb_pages This was found by kmemleak. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index a27d134eef367..641f97847b95a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1134,6 +1134,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) } vcpu_e500->num_shared_tlb_pages = 0; + + kfree(vcpu_e500->shared_tlb_pages); vcpu_e500->shared_tlb_pages = NULL; } else { kfree(vcpu_e500->gtlb_arch); From 5bd1cf118533aba41b3fbd4834e6362a9237db71 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:03:50 +0000 Subject: [PATCH 52/56] KVM: PPC: set IN_GUEST_MODE before checking requests Avoid a race as described in the code comment. Also remove a related smp_wmb() from booke's kvmppc_prepare_to_enter(). I can't see any reason for it, and the book3s_pr version doesn't have it. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 - arch/powerpc/kvm/powerpc.c | 14 +++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 69d047c22d20b..3d1f35dc78628 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -674,7 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) out: vcpu->mode = OUTSIDE_GUEST_MODE; - smp_wmb(); return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6002ea938a484..deb0d596d815f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,7 +78,16 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } + vcpu->mode = IN_GUEST_MODE; + + /* + * Reading vcpu->requests must happen after setting vcpu->mode, + * so we don't miss a request because the requester sees + * OUTSIDE_GUEST_MODE and assumes we'll be checking requests + * before next entering the guest (and thus doesn't IPI). + */ smp_mb(); + if (vcpu->requests) { /* Make sure we process requests preemptable */ local_irq_enable(); @@ -111,11 +120,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) #endif kvm_guest_enter(); - - /* Going into guest context! Yay! */ - vcpu->mode = IN_GUEST_MODE; - smp_wmb(); - break; } From a136a8bdc02fc14625ac45ee846cc646fc46597e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:31:56 +0000 Subject: [PATCH 53/56] KVM: PPC: Book3S: Get/set guest SPRs using the GET/SET_ONE_REG interface This enables userspace to get and set various SPRs (special-purpose registers) using the KVM_[GS]ET_ONE_REG ioctls. With this, userspace can get and set all the SPRs that are part of the guest state, either through the KVM_[GS]ET_REGS ioctls, the KVM_[GS]ET_SREGS ioctls, or the KVM_[GS]ET_ONE_REG ioctls. The SPRs that are added here are: - DABR: Data address breakpoint register - DSCR: Data stream control register - PURR: Processor utilization of resources register - SPURR: Scaled PURR - DAR: Data address register - DSISR: Data storage interrupt status register - AMR: Authority mask register - UAMOR: User authority mask override register - MMCR0, MMCR1, MMCRA: Performance monitor unit control registers - PMC1..PMC8: Performance monitor unit counter registers In order to reduce code duplication between PR and HV KVM code, this moves the kvm_vcpu_ioctl_[gs]et_one_reg functions into book3s.c and centralizes the copying between user and kernel space there. The registers that are handled differently between PR and HV, and those that exist only in one flavor, are handled in kvmppc_[gs]et_one_reg() functions that are specific to each flavor. Signed-off-by: Paul Mackerras [agraf: minimal style fixes] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 19 ++++++++ arch/powerpc/include/asm/kvm.h | 21 +++++++++ arch/powerpc/include/asm/kvm_ppc.h | 32 +++++++++++++ arch/powerpc/kvm/book3s.c | 68 ++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 76 +++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_pr.c | 23 +++++---- 6 files changed, 215 insertions(+), 24 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c84739e2c0f37..af9b7a06bcae5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1740,6 +1740,25 @@ registers, find a list below: PPC | KVM_REG_PPC_IAC4 | 64 PPC | KVM_REG_PPC_DAC1 | 64 PPC | KVM_REG_PPC_DAC2 | 64 + PPC | KVM_REG_PPC_DABR | 64 + PPC | KVM_REG_PPC_DSCR | 64 + PPC | KVM_REG_PPC_PURR | 64 + PPC | KVM_REG_PPC_SPURR | 64 + PPC | KVM_REG_PPC_DAR | 64 + PPC | KVM_REG_PPC_DSISR | 32 + PPC | KVM_REG_PPC_AMR | 64 + PPC | KVM_REG_PPC_UAMOR | 64 + PPC | KVM_REG_PPC_MMCR0 | 64 + PPC | KVM_REG_PPC_MMCR1 | 64 + PPC | KVM_REG_PPC_MMCRA | 64 + PPC | KVM_REG_PPC_PMC1 | 32 + PPC | KVM_REG_PPC_PMC2 | 32 + PPC | KVM_REG_PPC_PMC3 | 32 + PPC | KVM_REG_PPC_PMC4 | 32 + PPC | KVM_REG_PPC_PMC5 | 32 + PPC | KVM_REG_PPC_PMC6 | 32 + PPC | KVM_REG_PPC_PMC7 | 32 + PPC | KVM_REG_PPC_PMC8 | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 3c14202a3c84a..9557576a53255 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -338,5 +338,26 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5) #define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6) #define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9) +#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa) +#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb) +#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc) +#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd) +#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe) +#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf) + +#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) +#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) +#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) + +#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) +#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) +#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a) +#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b) +#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c) +#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d) +#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e) +#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3fb980d293e58..709f0ddae1f14 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3S #include #else @@ -196,6 +197,35 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +union kvmppc_one_reg { + u32 wval; + u64 dval; +}; + +#define one_reg_size(id) \ + (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +#define get_reg_val(id, reg) ({ \ + union kvmppc_one_reg __u; \ + switch (one_reg_size(id)) { \ + case 4: __u.wval = (reg); break; \ + case 8: __u.dval = (reg); break; \ + default: BUG(); \ + } \ + __u; \ +}) + + +#define set_reg_val(id, val) ({ \ + u64 __v; \ + switch (one_reg_size(id)) { \ + case 4: __v = (val).wval; break; \ + case 8: __v = (val).dval; break; \ + default: BUG(); \ + } \ + __v; \ +}) + void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); @@ -204,6 +234,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e94666566fa93..a5af28fc3a8fa 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -485,6 +485,74 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + r = kvmppc_get_one_reg(vcpu, reg->id, &val); + + if (r == -EINVAL) { + r = 0; + switch (reg->id) { + case KVM_REG_PPC_DAR: + val = get_reg_val(reg->id, vcpu->arch.shared->dar); + break; + case KVM_REG_PPC_DSISR: + val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); + break; + default: + r = -EINVAL; + break; + } + } + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; + + r = kvmppc_set_one_reg(vcpu, reg->id, &val); + + if (r == -EINVAL) { + r = 0; + switch (reg->id) { + case KVM_REG_PPC_DAR: + vcpu->arch.shared->dar = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_DSISR: + vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); + break; + default: + r = -EINVAL; + break; + } + } + + return r; +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c9ae3148c981d..1cc6b77fa63d9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -544,36 +544,88 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; + long int i; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = put_user(0, (u64 __user *)reg->addr); + *val = get_reg_val(id, 0); + break; + case KVM_REG_PPC_DABR: + *val = get_reg_val(id, vcpu->arch.dabr); + break; + case KVM_REG_PPC_DSCR: + *val = get_reg_val(id, vcpu->arch.dscr); + break; + case KVM_REG_PPC_PURR: + *val = get_reg_val(id, vcpu->arch.purr); + break; + case KVM_REG_PPC_SPURR: + *val = get_reg_val(id, vcpu->arch.spurr); + break; + case KVM_REG_PPC_AMR: + *val = get_reg_val(id, vcpu->arch.amr); + break; + case KVM_REG_PPC_UAMOR: + *val = get_reg_val(id, vcpu->arch.uamor); + break; + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + i = id - KVM_REG_PPC_MMCR0; + *val = get_reg_val(id, vcpu->arch.mmcr[i]); + break; + case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: + i = id - KVM_REG_PPC_PMC1; + *val = get_reg_val(id, vcpu->arch.pmc[i]); break; default: + r = -EINVAL; break; } return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; + long int i; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - { - u64 hior; /* Only allow this to be set to zero */ - r = get_user(hior, (u64 __user *)reg->addr); - if (!r && (hior != 0)) + if (set_reg_val(id, *val)) r = -EINVAL; break; - } + case KVM_REG_PPC_DABR: + vcpu->arch.dabr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DSCR: + vcpu->arch.dscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PURR: + vcpu->arch.purr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SPURR: + vcpu->arch.spurr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_AMR: + vcpu->arch.amr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_UAMOR: + vcpu->arch.uamor = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + i = id - KVM_REG_PPC_MMCR0; + vcpu->arch.mmcr[i] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: + i = id - KVM_REG_PPC_PMC1; + vcpu->arch.pmc[i] = set_reg_val(id, *val); + break; default: + r = -EINVAL; break; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bf3ec5d66d8c7..c81109f3a3766 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -945,34 +945,33 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = copy_to_user((u64 __user *)(long)reg->addr, - &to_book3s(vcpu)->hior, sizeof(u64)); + *val = get_reg_val(id, to_book3s(vcpu)->hior); break; default: + r = -EINVAL; break; } return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = copy_from_user(&to_book3s(vcpu)->hior, - (u64 __user *)(long)reg->addr, sizeof(u64)); - if (!r) - to_book3s(vcpu)->hior_explicit = true; + to_book3s(vcpu)->hior = set_reg_val(id, *val); + to_book3s(vcpu)->hior_explicit = true; break; default: + r = -EINVAL; break; } From a8bd19ef4dd49f0eef86a4a8eb43d60f967236b8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:32:30 +0000 Subject: [PATCH 54/56] KVM: PPC: Book3S: Get/set guest FP regs using the GET/SET_ONE_REG interface This enables userspace to get and set all the guest floating-point state using the KVM_[GS]ET_ONE_REG ioctls. The floating-point state includes all of the traditional floating-point registers and the FPSCR (floating point status/control register), all the VMX/Altivec vector registers and the VSCR (vector status/control register), and on POWER7, the vector-scalar registers (note that each FP register is the high-order half of the corresponding VSR). Most of these are implemented in common Book 3S code, except for VSX on POWER7. Because HV and PR differ in how they store the FP and VSX registers on POWER7, the code for these cases is not common. On POWER7, the FP registers are the upper halves of the VSX registers vsr0 - vsr31. PR KVM stores vsr0 - vsr31 in two halves, with the upper halves in the arch.fpr[] array and the lower halves in the arch.vsr[] array, whereas HV KVM on POWER7 stores the whole VSX register in arch.vsr[]. Signed-off-by: Paul Mackerras [agraf: fix whitespace, vsx compilation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 11 +++++++ arch/powerpc/include/asm/kvm.h | 20 +++++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s.c | 48 ++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 42 ++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 26 ++++++++++++++++ 6 files changed, 149 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index af9b7a06bcae5..71dc7e2ec8eb6 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1759,6 +1759,17 @@ registers, find a list below: PPC | KVM_REG_PPC_PMC6 | 32 PPC | KVM_REG_PPC_PMC7 | 32 PPC | KVM_REG_PPC_PMC8 | 32 + PPC | KVM_REG_PPC_FPR0 | 64 + ... + PPC | KVM_REG_PPC_FPR31 | 64 + PPC | KVM_REG_PPC_VR0 | 128 + ... + PPC | KVM_REG_PPC_VR31 | 128 + PPC | KVM_REG_PPC_VSR0 | 128 + ... + PPC | KVM_REG_PPC_VSR31 | 128 + PPC | KVM_REG_PPC_FPSCR | 64 + PPC | KVM_REG_PPC_VSCR | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 9557576a53255..1466975129c7b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -360,4 +360,24 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e) #define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f) +/* 32 floating-point registers */ +#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20) +#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n)) +#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f) + +/* 32 VMX/Altivec vector registers */ +#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40) +#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n)) +#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f) + +/* 32 double-width FP registers for VSX */ +/* High-order halves overlap with FP regs */ +#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60) +#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n)) +#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f) + +/* FP and vector status/control registers */ +#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) +#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 709f0ddae1f14..51604a16c8a5a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -200,6 +200,8 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) union kvmppc_one_reg { u32 wval; u64 dval; + vector128 vval; + u64 vsxval[2]; }; #define one_reg_size(id) \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a5af28fc3a8fa..a4b6452852409 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -490,6 +490,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r; union kvmppc_one_reg val; int size; + long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -506,6 +507,29 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_DSISR: val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); break; + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + i = reg->id - KVM_REG_PPC_FPR0; + val = get_reg_val(reg->id, vcpu->arch.fpr[i]); + break; + case KVM_REG_PPC_FPSCR: + val = get_reg_val(reg->id, vcpu->arch.fpscr); + break; +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0]; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); + break; +#endif /* CONFIG_ALTIVEC */ default: r = -EINVAL; break; @@ -525,6 +549,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r; union kvmppc_one_reg val; int size; + long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -544,6 +569,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_DSISR: vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + i = reg->id - KVM_REG_PPC_FPR0; + vcpu->arch.fpr[i] = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_FPSCR: + vcpu->arch.fpscr = set_reg_val(reg->id, val); + break; +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); + break; +#endif /* CONFIG_ALTIVEC */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1cc6b77fa63d9..94ec0e30969d1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -579,6 +579,27 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + /* VSX => FP reg i is stored in arch.vsr[2*i] */ + long int i = id - KVM_REG_PPC_FPR0; + *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); + } else { + /* let generic code handle it */ + r = -EINVAL; + } + break; + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = id - KVM_REG_PPC_VSR0; + val->vsxval[0] = vcpu->arch.vsr[2 * i]; + val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -624,6 +645,27 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + /* VSX => FP reg i is stored in arch.vsr[2*i] */ + long int i = id - KVM_REG_PPC_FPR0; + vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); + } else { + /* let generic code handle it */ + r = -EINVAL; + } + break; + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = id - KVM_REG_PPC_VSR0; + vcpu->arch.vsr[2 * i] = val->vsxval[0]; + vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c81109f3a3766..b853696b6d8e3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -953,6 +953,19 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { + long int i = id - KVM_REG_PPC_VSR0; + + if (!cpu_has_feature(CPU_FTR_VSX)) { + r = -ENXIO; + break; + } + val->vsxval[0] = vcpu->arch.fpr[i]; + val->vsxval[1] = vcpu->arch.vsr[i]; + break; + } +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -970,6 +983,19 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { + long int i = id - KVM_REG_PPC_VSR0; + + if (!cpu_has_feature(CPU_FTR_VSX)) { + r = -ENXIO; + break; + } + vcpu->arch.fpr[i] = val->vsxval[0]; + vcpu->arch.vsr[i] = val->vsxval[1]; + break; + } +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; From 55b665b0263ae88a776071306ef1eee4b769016b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:33:06 +0000 Subject: [PATCH 55/56] KVM: PPC: Book3S HV: Provide a way for userspace to get/set per-vCPU areas The PAPR paravirtualization interface lets guests register three different types of per-vCPU buffer areas in its memory for communication with the hypervisor. These are called virtual processor areas (VPAs). Currently the hypercalls to register and unregister VPAs are handled by KVM in the kernel, and userspace has no way to know about or save and restore these registrations across a migration. This adds "register" codes for these three areas that userspace can use with the KVM_GET/SET_ONE_REG ioctls to see what addresses have been registered, and to register or unregister them. This will be needed for guest hibernation and migration, and is also needed so that userspace can unregister them on reset (otherwise we corrupt guest memory after reboot by writing to the VPAs registered by the previous kernel). The "register" for the VPA is a 64-bit value containing the address, since the length of the VPA is fixed. The "registers" for the SLB shadow buffer and dispatch trace log (DTL) are 128 bits long, consisting of the guest physical address in the high (first) 64 bits and the length in the low 64 bits. This also fixes a bug where we were calling init_vpa unconditionally, leading to an oops when unregistering the VPA. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 3 ++ arch/powerpc/include/asm/kvm.h | 6 +++ arch/powerpc/include/asm/kvm_ppc.h | 4 ++ arch/powerpc/kvm/book3s_hv.c | 64 +++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 71dc7e2ec8eb6..e726d76eeebbe 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1770,6 +1770,9 @@ registers, find a list below: PPC | KVM_REG_PPC_VSR31 | 128 PPC | KVM_REG_PPC_FPSCR | 64 PPC | KVM_REG_PPC_VSCR | 32 + PPC | KVM_REG_PPC_VPA_ADDR | 64 + PPC | KVM_REG_PPC_VPA_SLB | 128 + PPC | KVM_REG_PPC_VPA_DTL | 128 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 1466975129c7b..b89ae4db45ced 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -380,4 +380,10 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) +/* Virtual processor areas */ +/* For SLB & DTL, address in high (first) half, length in low half */ +#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82) +#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83) +#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 51604a16c8a5a..609cca3e94262 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -202,6 +202,10 @@ union kvmppc_one_reg { u64 dval; vector128 vval; u64 vsxval[2]; + struct { + u64 addr; + u64 length; + } vpaval; }; #define one_reg_size(id) \ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 94ec0e30969d1..9a15da76e56be 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -143,6 +143,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) vpa->yield_count = 1; } +static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, + unsigned long addr, unsigned long len) +{ + /* check address is cacheline aligned */ + if (addr & (L1_CACHE_BYTES - 1)) + return -EINVAL; + spin_lock(&vcpu->arch.vpa_update_lock); + if (v->next_gpa != addr || v->len != len) { + v->next_gpa = addr; + v->len = addr ? len : 0; + v->update_pending = 1; + } + spin_unlock(&vcpu->arch.vpa_update_lock); + return 0; +} + /* Length for a per-processor buffer is passed in at offset 4 in the buffer */ struct reg_vpa { u32 dummy; @@ -321,7 +337,8 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); if (vcpu->arch.vpa.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); - init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + if (vcpu->arch.vpa.pinned_addr) + init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); } if (vcpu->arch.dtl.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); @@ -600,6 +617,23 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) } break; #endif /* CONFIG_VSX */ + case KVM_REG_PPC_VPA_ADDR: + spin_lock(&vcpu->arch.vpa_update_lock); + *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); + spin_unlock(&vcpu->arch.vpa_update_lock); + break; + case KVM_REG_PPC_VPA_SLB: + spin_lock(&vcpu->arch.vpa_update_lock); + val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa; + val->vpaval.length = vcpu->arch.slb_shadow.len; + spin_unlock(&vcpu->arch.vpa_update_lock); + break; + case KVM_REG_PPC_VPA_DTL: + spin_lock(&vcpu->arch.vpa_update_lock); + val->vpaval.addr = vcpu->arch.dtl.next_gpa; + val->vpaval.length = vcpu->arch.dtl.len; + spin_unlock(&vcpu->arch.vpa_update_lock); + break; default: r = -EINVAL; break; @@ -612,6 +646,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { int r = 0; long int i; + unsigned long addr, len; switch (id) { case KVM_REG_PPC_HIOR: @@ -666,6 +701,33 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) } break; #endif /* CONFIG_VSX */ + case KVM_REG_PPC_VPA_ADDR: + addr = set_reg_val(id, *val); + r = -EINVAL; + if (!addr && (vcpu->arch.slb_shadow.next_gpa || + vcpu->arch.dtl.next_gpa)) + break; + r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca)); + break; + case KVM_REG_PPC_VPA_SLB: + addr = val->vpaval.addr; + len = val->vpaval.length; + r = -EINVAL; + if (addr && !vcpu->arch.vpa.next_gpa) + break; + r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len); + break; + case KVM_REG_PPC_VPA_DTL: + addr = val->vpaval.addr; + len = val->vpaval.length; + r = -EINVAL; + if (len < sizeof(struct dtl_entry)) + break; + if (addr && !vcpu->arch.vpa.next_gpa) + break; + len -= len % sizeof(struct dtl_entry); + r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); + break; default: r = -EINVAL; break; From 12ecd9570d8941c15602a11725ec9b0ede48d6c2 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 4 Aug 2012 23:52:33 +0000 Subject: [PATCH 56/56] arch/powerpc/kvm/e500_tlb.c: fix error return code Convert a 0 error return code to a negative one, as returned elsewhere in the function. A new label is also added to avoid freeing things that are known to not yet be allocated. A simplified version of the semantic match that finds the first problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e,e1,e2,e3,e4,x; @@ ( if (\(ret != 0\|ret < 0\) || ...) { ... return ...; } | ret = 0 ) ... when != ret = e1 *x = \(kmalloc\|kzalloc\|kcalloc\|devm_kzalloc\|ioremap\|ioremap_nocache\|devm_ioremap\|devm_ioremap_nocache\)(...); ... when != x = e2 when != ret = e3 *if (x == NULL || ...) { ... when != ret = e4 * return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 641f97847b95a..c73389477d177 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1233,21 +1233,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, } virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); - if (!virt) + if (!virt) { + ret = -ENOMEM; goto err_put_page; + } privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], GFP_KERNEL); privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], GFP_KERNEL); - if (!privs[0] || !privs[1]) - goto err_put_page; + if (!privs[0] || !privs[1]) { + ret = -ENOMEM; + goto err_privs; + } g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], GFP_KERNEL); - if (!g2h_bitmap) - goto err_put_page; + if (!g2h_bitmap) { + ret = -ENOMEM; + goto err_privs; + } free_gtlb(vcpu_e500); @@ -1287,10 +1293,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; -err_put_page: +err_privs: kfree(privs[0]); kfree(privs[1]); +err_put_page: for (i = 0; i < num_pages; i++) put_page(pages[i]);