Skip to content

Commit

Permalink
Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/rusty/linux

Pull virtio updates from Rusty Russell:
 "Some virtio internal cleanups, a new virtio device "virtio input", and
  a change to allow the legacy virtio balloon.

  Most excitingly, some lguest work! No seriously, I got some cleanup
  patches"

* tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux:
  virtio: drop virtio_device_is_legacy_only
  virtio_pci: support non-legacy balloon devices
  virtio_mmio: support non-legacy balloon devices
  virtio_ccw: support non-legacy balloon devices
  virtio: balloon might not be a legacy device
  virtio_balloon: transitional interface
  virtio_ring: Update weak barriers to use dma_wmb/rmb
  virtio_pci_modern: switch to type-safe io accessors
  virtio_pci_modern: type-safe io accessors
  lguest: handle traps on the "interrupt suppressed" iret instruction.
  virtio: drop a useless config read
  virtio_config: reorder functions
  Add virtio-input driver.
  lguest: suppress interrupts for single insn, not range.
  lguest: simplify lguest_iret
  lguest: rename i386_head.S in the comments
  lguest: explicitly set miscdevice's private_data NULL
  lguest: fix pending interrupt test.
  • Loading branch information
torvalds committed Apr 22, 2015
2 parents 15ce265 + 9abbfb4 commit b9bb6fb
Show file tree
Hide file tree
Showing 24 changed files with 721 additions and 167 deletions.
6 changes: 6 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -10517,6 +10517,12 @@ S: Maintained
F: drivers/vhost/
F: include/uapi/linux/vhost.h

VIRTIO INPUT DRIVER
M: Gerd Hoffmann <[email protected]>
S: Maintained
F: drivers/virtio/virtio_input.c
F: include/uapi/linux/virtio_input.h

VIA RHINE NETWORK DRIVER
M: Roger Luethi <[email protected]>
S: Maintained
Expand Down
7 changes: 2 additions & 5 deletions arch/x86/include/asm/lguest.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@ extern unsigned long switcher_addr;
/* Found in switcher.S */
extern unsigned long default_idt_entries[];

/* Declarations for definitions in lguest_guest.S */
extern char lguest_noirq_start[], lguest_noirq_end[];
/* Declarations for definitions in arch/x86/lguest/head_32.S */
extern char lguest_noirq_iret[];
extern const char lgstart_cli[], lgend_cli[];
extern const char lgstart_sti[], lgend_sti[];
extern const char lgstart_popf[], lgend_popf[];
extern const char lgstart_pushf[], lgend_pushf[];
extern const char lgstart_iret[], lgend_iret[];

extern void lguest_iret(void);
extern void lguest_init(void);
Expand Down
7 changes: 3 additions & 4 deletions arch/x86/lguest/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@

struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
.noirq_start = (u32)lguest_noirq_start,
.noirq_end = (u32)lguest_noirq_end,
.noirq_iret = (u32)lguest_noirq_iret,
.kernel_address = PAGE_OFFSET,
.blocked_interrupts = { 1 }, /* Block timer interrupts */
.syscall_vec = SYSCALL_VECTOR,
Expand Down Expand Up @@ -262,7 +261,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
/*:*/

/* These are in i386_head.S */
/* These are in head_32.S */
extern void lg_irq_enable(void);
extern void lg_restore_fl(unsigned long flags);

Expand Down Expand Up @@ -1368,7 +1367,7 @@ static void lguest_restart(char *reason)
* fit comfortably.
*
* First we need assembly templates of each of the patchable Guest operations,
* and these are in i386_head.S.
* and these are in head_32.S.
*/

/*G:060 We construct a table from the assembler templates: */
Expand Down
30 changes: 13 additions & 17 deletions arch/x86/lguest/head_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ ENTRY(lg_irq_enable)
* set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we
* jump to send_interrupts, otherwise we're done.
*/
testl $0, lguest_data+LGUEST_DATA_irq_pending
cmpl $0, lguest_data+LGUEST_DATA_irq_pending
jnz send_interrupts
/*
* One cool thing about x86 is that you can do many things without using
Expand Down Expand Up @@ -133,9 +133,8 @@ ENTRY(lg_restore_fl)
ret
/*:*/

/* These demark the EIP range where host should never deliver interrupts. */
.global lguest_noirq_start
.global lguest_noirq_end
/* These demark the EIP where host should never deliver interrupts. */
.global lguest_noirq_iret

/*M:004
* When the Host reflects a trap or injects an interrupt into the Guest, it
Expand Down Expand Up @@ -168,29 +167,26 @@ ENTRY(lg_restore_fl)
* So we have to copy eflags from the stack to lguest_data.irq_enabled before
* we do the "iret".
*
* There are two problems with this: firstly, we need to use a register to do
* the copy and secondly, the whole thing needs to be atomic. The first
* problem is easy to solve: push %eax on the stack so we can use it, and then
* restore it at the end just before the real "iret".
* There are two problems with this: firstly, we can't clobber any registers
* and secondly, the whole thing needs to be atomic. The first problem
* is solved by using "push memory"/"pop memory" instruction pair for copying.
*
* The second is harder: copying eflags to lguest_data.irq_enabled will turn
* interrupts on before we're finished, so we could be interrupted before we
* return to userspace or wherever. Our solution to this is to surround the
* code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
* return to userspace or wherever. Our solution to this is to tell the
* Host that it is *never* to interrupt us there, even if interrupts seem to be
* enabled.
* enabled. (It's not necessary to protect pop instruction, since
* data gets updated only after it completes, so we only need to protect
* one instruction, iret).
*/
ENTRY(lguest_iret)
pushl %eax
movl 12(%esp), %eax
lguest_noirq_start:
pushl 2*4(%esp)
/*
* Note the %ss: segment prefix here. Normal data accesses use the
* "ds" segment, but that will have already been restored for whatever
* we're returning to (such as userspace): we can't trust it. The %ss:
* prefix makes sure we use the stack segment, which is still valid.
*/
movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
popl %eax
popl %ss:lguest_data+LGUEST_DATA_irq_enabled
lguest_noirq_iret:
iret
lguest_noirq_end:
5 changes: 2 additions & 3 deletions drivers/lguest/hypercalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,9 @@ static void initialize(struct lg_cpu *cpu)

/*
* The Guest tells us where we're not to deliver interrupts by putting
* the range of addresses into "struct lguest_data".
* the instruction address into "struct lguest_data".
*/
if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
|| get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
if (get_user(cpu->lg->noirq_iret, &cpu->lg->lguest_data->noirq_iret))
kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);

/*
Expand Down
105 changes: 75 additions & 30 deletions drivers/lguest/interrupts_and_traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,16 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
}

/*H:210
* The set_guest_interrupt() routine actually delivers the interrupt or
* trap. The mechanics of delivering traps and interrupts to the Guest are the
* same, except some traps have an "error code" which gets pushed onto the
* stack as well: the caller tells us if this is one.
*
* "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
* interrupt or trap. It's split into two parts for traditional reasons: gcc
* on i386 used to be frightened by 64 bit numbers.
* The push_guest_interrupt_stack() routine saves Guest state on the stack for
* an interrupt or trap. The mechanics of delivering traps and interrupts to
* the Guest are the same, except some traps have an "error code" which gets
* pushed onto the stack as well: the caller tells us if this is one.
*
* We set up the stack just like the CPU does for a real interrupt, so it's
* identical for the Guest (and the standard "iret" instruction will undo
* it).
*/
static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
bool has_err)
static void push_guest_interrupt_stack(struct lg_cpu *cpu, bool has_err)
{
unsigned long gstack, origstack;
u32 eflags, ss, irq_enable;
Expand Down Expand Up @@ -130,12 +125,28 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
if (has_err)
push_guest_stack(cpu, &gstack, cpu->regs->errcode);

/*
* Now we've pushed all the old state, we change the stack, the code
* segment and the address to execute.
*/
/* Adjust the stack pointer and stack segment. */
cpu->regs->ss = ss;
cpu->regs->esp = virtstack + (gstack - origstack);
}

/*
* This actually makes the Guest start executing the given interrupt/trap
* handler.
*
* "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
* interrupt or trap. It's split into two parts for traditional reasons: gcc
* on i386 used to be frightened by 64 bit numbers.
*/
static void guest_run_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi)
{
/* If we're already in the kernel, we don't change stacks. */
if ((cpu->regs->ss&0x3) != GUEST_PL)
cpu->regs->ss = cpu->esp1;

/*
* Set the code segment and the address to execute.
*/
cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
cpu->regs->eip = idt_address(lo, hi);

Expand All @@ -158,6 +169,24 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
kill_guest(cpu, "Disabling interrupts");
}

/* This restores the eflags word which was pushed on the stack by a trap */
static void restore_eflags(struct lg_cpu *cpu)
{
/* This is the physical address of the stack. */
unsigned long stack_pa = guest_pa(cpu, cpu->regs->esp);

/*
* Stack looks like this:
* Address Contents
* esp EIP
* esp + 4 CS
* esp + 8 EFLAGS
*/
cpu->regs->eflags = lgread(cpu, stack_pa + 8, u32);
cpu->regs->eflags &=
~(X86_EFLAGS_TF|X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT);
}

/*H:205
* Virtual Interrupts.
*
Expand Down Expand Up @@ -200,14 +229,6 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)

BUG_ON(irq >= LGUEST_IRQS);

/*
* They may be in the middle of an iret, where they asked us never to
* deliver interrupts.
*/
if (cpu->regs->eip >= cpu->lg->noirq_start &&
(cpu->regs->eip < cpu->lg->noirq_end))
return;

/* If they're halted, interrupts restart them. */
if (cpu->halted) {
/* Re-enable interrupts. */
Expand Down Expand Up @@ -237,12 +258,34 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
if (idt_present(idt->a, idt->b)) {
/* OK, mark it no longer pending and deliver it. */
clear_bit(irq, cpu->irqs_pending);

/*
* set_guest_interrupt() takes the interrupt descriptor and a
* flag to say whether this interrupt pushes an error code onto
* the stack as well: virtual interrupts never do.
* They may be about to iret, where they asked us never to
* deliver interrupts. In this case, we can emulate that iret
* then immediately deliver the interrupt. This is basically
* a noop: the iret would pop the interrupt frame and restore
* eflags, and then we'd set it up again. So just restore the
* eflags word and jump straight to the handler in this case.
*
* Denys Vlasenko points out that this isn't quite right: if
* the iret was returning to userspace, then that interrupt
* would reset the stack pointer (which the Guest told us
* about via LHCALL_SET_STACK). But unless the Guest is being
* *really* weird, that will be the same as the current stack
* anyway.
*/
set_guest_interrupt(cpu, idt->a, idt->b, false);
if (cpu->regs->eip == cpu->lg->noirq_iret) {
restore_eflags(cpu);
} else {
/*
* set_guest_interrupt() takes a flag to say whether
* this interrupt pushes an error code onto the stack
* as well: virtual interrupts never do.
*/
push_guest_interrupt_stack(cpu, false);
}
/* Actually make Guest cpu jump to handler. */
guest_run_interrupt(cpu, idt->a, idt->b);
}

/*
Expand Down Expand Up @@ -353,8 +396,9 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
*/
if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
return false;
set_guest_interrupt(cpu, cpu->arch.idt[num].a,
cpu->arch.idt[num].b, has_err(num));
push_guest_interrupt_stack(cpu, has_err(num));
guest_run_interrupt(cpu, cpu->arch.idt[num].a,
cpu->arch.idt[num].b);
return true;
}

Expand Down Expand Up @@ -395,8 +439,9 @@ static bool direct_trap(unsigned int num)
* The Guest has the ability to turn its interrupt gates into trap gates,
* if it is careful. The Host will let trap gates can go directly to the
* Guest, but the Guest needs the interrupts atomically disabled for an
* interrupt gate. It can do this by pointing the trap gate at instructions
* within noirq_start and noirq_end, where it can safely disable interrupts.
* interrupt gate. The Host could provide a mechanism to register more
* "no-interrupt" regions, and the Guest could point the trap gate at
* instructions within that region, where it can safely disable interrupts.
*/

/*M:006
Expand Down
2 changes: 1 addition & 1 deletion drivers/lguest/lg.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ struct lguest {

struct pgdir pgdirs[4];

unsigned long noirq_start, noirq_end;
unsigned long noirq_iret;

unsigned int stack_pages;
u32 tsc_khz;
Expand Down
8 changes: 8 additions & 0 deletions drivers/lguest/lguest_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,13 @@ static ssize_t write(struct file *file, const char __user *in,
}
}

static int open(struct inode *inode, struct file *file)
{
file->private_data = NULL;

return 0;
}

/*L:060
* The final piece of interface code is the close() routine. It reverses
* everything done in initialize(). This is usually called because the
Expand Down Expand Up @@ -409,6 +416,7 @@ static int close(struct inode *inode, struct file *file)
*/
static const struct file_operations lguest_fops = {
.owner = THIS_MODULE,
.open = open,
.release = close,
.write = write,
.read = read,
Expand Down
10 changes: 3 additions & 7 deletions drivers/s390/kvm/virtio_ccw.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,13 +1201,9 @@ static int virtio_ccw_online(struct ccw_device *cdev)
vcdev->vdev.id.vendor = cdev->id.cu_type;
vcdev->vdev.id.device = cdev->id.cu_model;

if (virtio_device_is_legacy_only(vcdev->vdev.id)) {
vcdev->revision = 0;
} else {
ret = virtio_ccw_set_transport_rev(vcdev);
if (ret)
goto out_free;
}
ret = virtio_ccw_set_transport_rev(vcdev);
if (ret)
goto out_free;

ret = register_virtio_device(&vcdev->vdev);
if (ret) {
Expand Down
10 changes: 10 additions & 0 deletions drivers/virtio/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ config VIRTIO_BALLOON

If unsure, say M.

config VIRTIO_INPUT
tristate "Virtio input driver"
depends on VIRTIO
depends on INPUT
---help---
This driver supports virtio input devices such as
keyboards, mice and tablets.

If unsure, say M.

config VIRTIO_MMIO
tristate "Platform bus driver for memory mapped virtio devices"
depends on HAS_IOMEM
Expand Down
1 change: 1 addition & 0 deletions drivers/virtio/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
6 changes: 0 additions & 6 deletions drivers/virtio/virtio.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,6 @@ static struct bus_type virtio_bus = {
.remove = virtio_dev_remove,
};

bool virtio_device_is_legacy_only(struct virtio_device_id id)
{
return id.device == VIRTIO_ID_BALLOON;
}
EXPORT_SYMBOL_GPL(virtio_device_is_legacy_only);

int register_virtio_driver(struct virtio_driver *driver)
{
/* Catch this early. */
Expand Down
Loading

0 comments on commit b9bb6fb

Please sign in to comment.