Skip to content

Commit

Permalink
KVM: fast-path msi injection with irqfd
Browse files Browse the repository at this point in the history
Store irq routing table pointer in the irqfd object,
and use that to inject MSI directly without bouncing out to
a kernel thread.

While we touch this structure, rearrange irqfd fields to make fastpath
better packed for better cache utilization.

This also adds some comments about locking rules and rcu usage in code.

Some notes on the design:
- Use pointer into the rt instead of copying an entry,
  to make it possible to use rcu, thus side-stepping
  locking complexities.  We also save some memory this way.
- Old workqueue code is still used for level irqs.
  I don't think we DTRT with level anyway, however,
  it seems easier to keep the code around as
  it has been thought through and debugged, and fix level later than
  rip out and re-instate it later.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Acked-by: Marcelo Tosatti <[email protected]>
Acked-by: Gregory Haskins <[email protected]>
Signed-off-by: Avi Kivity <[email protected]>
  • Loading branch information
mstsirkin authored and avikivity committed Jan 12, 2011
1 parent 104f226 commit bd2b53b
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 15 deletions.
16 changes: 16 additions & 0 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/preempt.h>
#include <linux/msi.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <asm/signal.h>

#include <linux/kvm.h>
Expand Down Expand Up @@ -240,6 +241,10 @@ struct kvm {

struct mutex irq_lock;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
/*
* Update side is protected by irq_lock and,
* if configured, irqfds.lock.
*/
struct kvm_irq_routing_table __rcu *irq_routing;
struct hlist_head mask_notifier_list;
struct hlist_head irq_ack_notifier_list;
Expand Down Expand Up @@ -511,6 +516,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
unsigned long *deliver_bitmask);
#endif
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
int irq_source_id, int level);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
Expand Down Expand Up @@ -652,17 +659,26 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
void kvm_eventfd_init(struct kvm *kvm);
int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
void kvm_irqfd_release(struct kvm *kvm);
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);

#else

static inline void kvm_eventfd_init(struct kvm *kvm) {}

static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
{
return -EINVAL;
}

static inline void kvm_irqfd_release(struct kvm *kvm) {}

static inline void kvm_irq_routing_update(struct kvm *kvm,
struct kvm_irq_routing_table *irq_rt)
{
rcu_assign_pointer(kvm->irq_routing, irq_rt);
}

static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
return -ENOSYS;
Expand Down
91 changes: 79 additions & 12 deletions virt/kvm/eventfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,19 @@
*/

struct _irqfd {
struct kvm *kvm;
struct eventfd_ctx *eventfd;
int gsi;
struct list_head list;
poll_table pt;
wait_queue_t wait;
struct work_struct inject;
struct work_struct shutdown;
/* Used for MSI fast-path */
struct kvm *kvm;
wait_queue_t wait;
/* Update side is protected by irqfds.lock */
struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
/* Used for level IRQ fast-path */
int gsi;
struct work_struct inject;
/* Used for setup/shutdown */
struct eventfd_ctx *eventfd;
struct list_head list;
poll_table pt;
struct work_struct shutdown;
};

static struct workqueue_struct *irqfd_cleanup_wq;
Expand Down Expand Up @@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
unsigned long flags = (unsigned long)key;
struct kvm_kernel_irq_routing_entry *irq;
struct kvm *kvm = irqfd->kvm;

if (flags & POLLIN)
if (flags & POLLIN) {
rcu_read_lock();
irq = rcu_dereference(irqfd->irq_entry);
/* An event has been signaled, inject an interrupt */
schedule_work(&irqfd->inject);
if (irq)
kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
else
schedule_work(&irqfd->inject);
rcu_read_unlock();
}

if (flags & POLLHUP) {
/* The eventfd is closing, detach from KVM */
struct kvm *kvm = irqfd->kvm;
unsigned long flags;

spin_lock_irqsave(&kvm->irqfds.lock, flags);
Expand Down Expand Up @@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
add_wait_queue(wqh, &irqfd->wait);
}

/* Must be called under irqfds.lock */
static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
struct kvm_irq_routing_table *irq_rt)
{
struct kvm_kernel_irq_routing_entry *e;
struct hlist_node *n;

if (irqfd->gsi >= irq_rt->nr_rt_entries) {
rcu_assign_pointer(irqfd->irq_entry, NULL);
return;
}

hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
/* Only fast-path MSI. */
if (e->type == KVM_IRQ_ROUTING_MSI)
rcu_assign_pointer(irqfd->irq_entry, e);
else
rcu_assign_pointer(irqfd->irq_entry, NULL);
}
}

static int
kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
{
struct kvm_irq_routing_table *irq_rt;
struct _irqfd *irqfd, *tmp;
struct file *file = NULL;
struct eventfd_ctx *eventfd = NULL;
Expand Down Expand Up @@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
goto fail;
}

irq_rt = rcu_dereference_protected(kvm->irq_routing,
lockdep_is_held(&kvm->irqfds.lock));
irqfd_update(kvm, irqfd, irq_rt);

events = file->f_op->poll(file, &irqfd->pt);

list_add_tail(&irqfd->list, &kvm->irqfds.items);
Expand Down Expand Up @@ -271,8 +310,17 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
spin_lock_irq(&kvm->irqfds.lock);

list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
/*
* This rcu_assign_pointer is needed for when
* another thread calls kvm_irqfd_update before
* we flush workqueue below.
* It is paired with synchronize_rcu done by caller
* of that function.
*/
rcu_assign_pointer(irqfd->irq_entry, NULL);
irqfd_deactivate(irqfd);
}
}

spin_unlock_irq(&kvm->irqfds.lock);
Expand Down Expand Up @@ -321,6 +369,25 @@ kvm_irqfd_release(struct kvm *kvm)

}

/*
* Change irq_routing and irqfd.
* Caller must invoke synchronize_rcu afterwards.
*/
void kvm_irq_routing_update(struct kvm *kvm,
struct kvm_irq_routing_table *irq_rt)
{
struct _irqfd *irqfd;

spin_lock_irq(&kvm->irqfds.lock);

rcu_assign_pointer(kvm->irq_routing, irq_rt);

list_for_each_entry(irqfd, &kvm->irqfds.items, list)
irqfd_update(kvm, irqfd, irq_rt);

spin_unlock_irq(&kvm->irqfds.lock);
}

/*
* create a host-wide workqueue for issuing deferred shutdown requests
* aggregated from all vm* instances. We need our own isolated single-thread
Expand Down
7 changes: 4 additions & 3 deletions virt/kvm/irq_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
return r;
}

static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level)
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level)
{
struct kvm_lapic_irq irq;

Expand Down Expand Up @@ -409,8 +409,9 @@ int kvm_set_irq_routing(struct kvm *kvm,

mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
rcu_assign_pointer(kvm->irq_routing, new);
kvm_irq_routing_update(kvm, new);
mutex_unlock(&kvm->irq_lock);

synchronize_rcu();

new = old;
Expand Down

0 comments on commit bd2b53b

Please sign in to comment.