Skip to content

Commit

Permalink
Merge tag 'kvm-s390-20140130' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/kvms390/linux into HEAD

Two new features are added by this patch set:
- The floating interrupt controller (flic) that allows us to inject,
  clear and inspect non-vcpu local interrupts. This also gives us an
  opportunity to fix deficiencies in our existing interrupt definitions.
- Support for asynchronous page faults via the pfault mechanism. Testing
  show significant guest performance improvements under host swap.
  • Loading branch information
bonzini committed Feb 4, 2014
2 parents 4f34d68 + 536336c commit f244d91
Show file tree
Hide file tree
Showing 20 changed files with 822 additions and 104 deletions.
46 changes: 46 additions & 0 deletions Documentation/virtual/kvm/devices/s390_flic.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
FLIC (floating interrupt controller)
====================================

FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
machine check interruptions. All interrupts are stored in a per-vm list of
pending interrupts. FLIC performs operations on this list.

Only one FLIC instance may be instantiated.

FLIC provides support to
- add interrupts (KVM_DEV_FLIC_ENQUEUE)
- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
- enable/disable for the guest transparent async page faults

Groups:
KVM_DEV_FLIC_ENQUEUE
Passes a buffer and length into the kernel which are then injected into
the list of pending interrupts.
attr->addr contains the pointer to the buffer and attr->attr contains
the length of the buffer.
The format of the data structure kvm_s390_irq as it is copied from userspace
is defined in usr/include/linux/kvm.h.

KVM_DEV_FLIC_GET_ALL_IRQS
Copies all floating interrupts into a buffer provided by userspace.
When the buffer is too small it returns -ENOMEM, which is the indication
for userspace to try again with a bigger buffer.
All interrupts remain pending, i.e. are not deleted from the list of
currently pending interrupts.
attr->addr contains the userspace address of the buffer into which all
interrupt data will be copied.
attr->attr contains the size of the buffer in bytes.

KVM_DEV_FLIC_CLEAR_IRQS
Simply deletes all elements from the list of currently pending floating
interrupts. No interrupts are injected into the guest.

KVM_DEV_FLIC_APF_ENABLE
Enables async page faults for the guest. So in case of a major page fault
the host is allowed to handle this async and continues the guest.

KVM_DEV_FLIC_APF_DISABLE_WAIT
Disables async page faults for the guest and waits until already pending
async page faults are done. This is necessary to trigger a completion interrupt
for every init interrupt before migrating the interrupt list.
58 changes: 25 additions & 33 deletions arch/s390/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>

Expand Down Expand Up @@ -168,18 +169,6 @@ struct kvm_vcpu_stat {
u32 diagnose_9c;
};

struct kvm_s390_io_info {
__u16 subchannel_id; /* 0x0b8 */
__u16 subchannel_nr; /* 0x0ba */
__u32 io_int_parm; /* 0x0bc */
__u32 io_int_word; /* 0x0c0 */
};

struct kvm_s390_ext_info {
__u32 ext_params;
__u64 ext_params2;
};

#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
Expand All @@ -188,27 +177,6 @@ struct kvm_s390_ext_info {
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07

struct kvm_s390_pgm_info {
__u16 code;
};

struct kvm_s390_prefix_info {
__u32 address;
};

struct kvm_s390_extcall_info {
__u16 code;
};

struct kvm_s390_emerg_info {
__u16 code;
};

struct kvm_s390_mchk_info {
__u64 cr14;
__u64 mcic;
};

struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
Expand Down Expand Up @@ -246,6 +214,7 @@ struct kvm_s390_float_interrupt {
unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
/ sizeof(long)];
struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
unsigned int irq_count;
};


Expand All @@ -262,6 +231,10 @@ struct kvm_vcpu_arch {
u64 stidp_data;
};
struct gmap *gmap;
#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
};

struct kvm_vm_stat {
Expand All @@ -275,6 +248,7 @@ struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
struct gmap *gmap;
int css_support;
};
Expand All @@ -287,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
return IS_ERR_VALUE(addr);
}

#define ASYNC_PF_PER_VCPU 64
struct kvm_vcpu;
struct kvm_async_pf;
struct kvm_arch_async_pf {
unsigned long pfault_token;
};

bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);

void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);

void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);

void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);

extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
#endif
2 changes: 2 additions & 0 deletions arch/s390/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space
* @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
Expand All @@ -775,6 +776,7 @@ struct gmap {
unsigned long asce;
void *private;
struct list_head crst_list;
bool pfault_enabled;
};

/**
Expand Down
1 change: 1 addition & 0 deletions arch/s390/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ struct thread_struct {
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
Expand Down
19 changes: 19 additions & 0 deletions arch/s390/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@

#define __KVM_S390

/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
#define KVM_DEV_FLIC_ENQUEUE 2
#define KVM_DEV_FLIC_CLEAR_IRQS 3
#define KVM_DEV_FLIC_APF_ENABLE 4
#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
* There are also sclp and machine checks. This gives us
* sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
* Lets round up to 8192 pages.
*/
#define KVM_S390_MAX_FLOAT_IRQS 266250
#define KVM_S390_FLIC_MAX_BUFFER 0x2000000

/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
Expand Down Expand Up @@ -57,4 +73,7 @@ struct kvm_sync_regs {
#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
#endif
2 changes: 2 additions & 0 deletions arch/s390/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ config KVM
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# as published by the Free Software Foundation.

KVM := ../../../virt/kvm
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o

ccflags-y := -Ivirt/kvm -Iarch/s390/kvm

Expand Down
84 changes: 84 additions & 0 deletions arch/s390/kvm/diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"

static int diag_release_pages(struct kvm_vcpu *vcpu)
{
Expand Down Expand Up @@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
return 0;
}

static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
{
struct prs_parm {
u16 code;
u16 subcode;
u16 parm_len;
u16 parm_version;
u64 token_addr;
u64 select_mask;
u64 compare_mask;
u64 zarch;
};
struct prs_parm parm;
int rc;
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
unsigned long hva_token = KVM_HVA_ERR_BAD;

if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

switch (parm.subcode) {
case 0: /* TOKEN */
if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
/*
* If the pagefault handshake is already activated,
* the token must not be changed. We have to return
* decimal 8 instead, as mandated in SC24-6084.
*/
vcpu->run->s.regs.gprs[ry] = 8;
return 0;
}

if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
if (kvm_is_error_hva(hva_token))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);

vcpu->arch.pfault_token = parm.token_addr;
vcpu->arch.pfault_select = parm.select_mask;
vcpu->arch.pfault_compare = parm.compare_mask;
vcpu->run->s.regs.gprs[ry] = 0;
rc = 0;
break;
case 1: /*
* CANCEL
* Specification allows to let already pending tokens survive
* the cancel, therefore to reduce code complexity, we assume
* all outstanding tokens are already pending.
*/
if (parm.token_addr || parm.select_mask ||
parm.compare_mask || parm.zarch)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

vcpu->run->s.regs.gprs[ry] = 0;
/*
* If the pfault handling was not established or is already
* canceled SC24-6084 requests to return decimal 4.
*/
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
vcpu->run->s.regs.gprs[ry] = 4;
else
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;

rc = 0;
break;
default:
rc = -EOPNOTSUPP;
break;
}

return rc;
}

static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
Expand Down Expand Up @@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return __diag_time_slice_end(vcpu);
case 0x9c:
return __diag_time_slice_end_directed(vcpu);
case 0x258:
return __diag_page_ref_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
case 0x500:
Expand Down
Loading

0 comments on commit f244d91

Please sign in to comment.