Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-f…
Browse files Browse the repository at this point in the history
…or-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
  lguest: don't force VIRTIO_F_NOTIFY_ON_EMPTY
  lguest: cleanup for map_switcher()
  lguest: use PGDIR_SHIFT for PAE code to allow different PAGE_OFFSET
  lguest: use set_pte/set_pmd uniformly for real page table entries
  lguest: move panic notifier registration to its expected place.
  virtio_blk: add support for cache flush
  virtio: add virtio IDs file
  virtio: get rid of redundant VIRTIO_ID_9P definition
  virtio: make add_buf return capacity remaining
  virtio_pci: minor MSI-X cleanups
  • Loading branch information
torvalds committed Sep 23, 2009
2 parents 4266c97 + ca60a42 commit 1f0918d
Show file tree
Hide file tree
Showing 20 changed files with 199 additions and 133 deletions.
26 changes: 22 additions & 4 deletions Documentation/lguest/lguest.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <signal.h>
#include "linux/lguest_launcher.h"
#include "linux/virtio_config.h"
#include <linux/virtio_ids.h>
#include "linux/virtio_net.h"
#include "linux/virtio_blk.h"
#include "linux/virtio_console.h"
Expand Down Expand Up @@ -133,6 +134,9 @@ struct device {
/* Is it operational */
bool running;

/* Does Guest want an intrrupt on empty? */
bool irq_on_empty;

/* Device-specific data. */
void *priv;
};
Expand Down Expand Up @@ -623,10 +627,13 @@ static void trigger_irq(struct virtqueue *vq)
return;
vq->pending_used = 0;

/* If they don't want an interrupt, don't send one, unless empty. */
if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
&& lg_last_avail(vq) != vq->vring.avail->idx)
return;
/* If they don't want an interrupt, don't send one... */
if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
/* ... unless they've asked us to force one on empty. */
if (!vq->dev->irq_on_empty
|| lg_last_avail(vq) != vq->vring.avail->idx)
return;
}

/* Send the Guest an interrupt tell them we used something up. */
if (write(lguest_fd, buf, sizeof(buf)) != 0)
Expand Down Expand Up @@ -1042,6 +1049,15 @@ static void create_thread(struct virtqueue *vq)
close(vq->eventfd);
}

static bool accepted_feature(struct device *dev, unsigned int bit)
{
const u8 *features = get_feature_bits(dev) + dev->feature_len;

if (dev->feature_len < bit / CHAR_BIT)
return false;
return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
}

static void start_device(struct device *dev)
{
unsigned int i;
Expand All @@ -1055,6 +1071,8 @@ static void start_device(struct device *dev)
verbose(" %02x", get_feature_bits(dev)
[dev->feature_len+i]);

dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);

for (vq = dev->vq; vq; vq = vq->next) {
if (vq->service)
create_thread(vq);
Expand Down
10 changes: 4 additions & 6 deletions arch/x86/lguest/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -1135,11 +1135,6 @@ static struct notifier_block paniced = {
/* Setting up memory is fairly easy. */
static __init char *lguest_memory_setup(void)
{
/* We do this here and not earlier because lockcheck used to barf if we
* did it before start_kernel(). I think we fixed that, so it'd be
* nice to move it back to lguest_init. Patch welcome... */
atomic_notifier_chain_register(&panic_notifier_list, &paniced);

/*
*The Linux bootloader header contains an "e820" memory map: the
* Launcher populated the first entry with our memory limit.
Expand Down Expand Up @@ -1364,10 +1359,13 @@ __init void lguest_init(void)

/*
* If we don't initialize the lock dependency checker now, it crashes
* paravirt_disable_iospace.
* atomic_notifier_chain_register, then paravirt_disable_iospace.
*/
lockdep_init();

/* Hook in our special panic hypercall code. */
atomic_notifier_chain_register(&panic_notifier_list, &paniced);

/*
* The IDE code spends about 3 seconds probing for disks: if we reserve
* all the I/O ports up front it can't get them and so doesn't probe.
Expand Down
33 changes: 27 additions & 6 deletions drivers/block/virtio_blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/virtio.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>

Expand Down Expand Up @@ -91,15 +92,26 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
return false;

vbr->req = req;
if (blk_fs_request(vbr->req)) {
switch (req->cmd_type) {
case REQ_TYPE_FS:
vbr->out_hdr.type = 0;
vbr->out_hdr.sector = blk_rq_pos(vbr->req);
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
} else if (blk_pc_request(vbr->req)) {
break;
case REQ_TYPE_BLOCK_PC:
vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
} else {
break;
case REQ_TYPE_LINUX_BLOCK:
if (req->cmd[0] == REQ_LB_OP_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
break;
}
/*FALLTHRU*/
default:
/* We don't put anything else in the queue. */
BUG();
}
Expand Down Expand Up @@ -139,7 +151,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
}
}

if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
mempool_free(vbr, vblk->pool);
return false;
}
Expand Down Expand Up @@ -199,6 +211,12 @@ static int virtblk_identify(struct gendisk *disk, void *argp)
return err;
}

static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
{
req->cmd_type = REQ_TYPE_LINUX_BLOCK;
req->cmd[0] = REQ_LB_OP_FLUSH;
}

static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long data)
{
Expand Down Expand Up @@ -337,7 +355,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
index++;

/* If barriers are supported, tell block layer that queue is ordered */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
virtblk_prepare_flush);
else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);

/* If disk is read-only in the host, the guest should obey */
Expand Down Expand Up @@ -424,7 +445,7 @@ static struct virtio_device_id id_table[] = {
static unsigned int features[] = {
VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
};

/*
Expand Down
3 changes: 2 additions & 1 deletion drivers/char/hw_random/virtio-rng.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/virtio.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_rng.h>

/* The host will fill any buffer we give it with sweet, sweet randomness. We
Expand Down Expand Up @@ -51,7 +52,7 @@ static void register_buffer(void)

sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
/* There should always be room for one buffer. */
if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0)
BUG();
vq->vq_ops->kick(vq);
}
Expand Down
5 changes: 3 additions & 2 deletions drivers/char/virtio_console.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <linux/err.h>
#include <linux/init.h>
#include <linux/virtio.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_console.h>
#include "hvc_console.h"

Expand Down Expand Up @@ -65,7 +66,7 @@ static int put_chars(u32 vtermno, const char *buf, int count)

/* add_buf wants a token to identify this buffer: we hand it any
* non-NULL pointer, since there's only ever one buffer. */
if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
/* Tell Host to go! */
out_vq->vq_ops->kick(out_vq);
/* Chill out until it's done with the buffer. */
Expand All @@ -85,7 +86,7 @@ static void add_inbuf(void)
sg_init_one(sg, inbuf, PAGE_SIZE);

/* We should always be able to add one buffer to an empty queue. */
if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0)
BUG();
in_vq->vq_ops->kick(in_vq);
}
Expand Down
5 changes: 2 additions & 3 deletions drivers/lguest/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,11 @@ static __init int map_switcher(void)
* so we make sure they're zeroed.
*/
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
unsigned long addr = get_zeroed_page(GFP_KERNEL);
if (!addr) {
switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
if (!switcher_page[i]) {
err = -ENOMEM;
goto free_some_pages;
}
switcher_page[i] = virt_to_page(addr);
}

/*
Expand Down
45 changes: 19 additions & 26 deletions drivers/lguest/page_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
* And we copy the flags to the shadow PMD entry. The page
* number in the shadow PMD is the page we just allocated.
*/
native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
}

/*
Expand Down Expand Up @@ -447,7 +447,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
* we will come back here when a write does actually occur, so
* we can update the Guest's _PAGE_DIRTY flag.
*/
native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));

/*
* Finally, we write the Guest PTE entry back: we've set the
Expand Down Expand Up @@ -528,7 +528,7 @@ static void release_pmd(pmd_t *spmd)
/* Now we can free the page of PTEs */
free_page((long)ptepage);
/* And zero out the PMD entry so we never release it twice. */
native_set_pmd(spmd, __pmd(0));
set_pmd(spmd, __pmd(0));
}
}

Expand Down Expand Up @@ -833,15 +833,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
*/
if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
check_gpte(cpu, gpte);
native_set_pte(spte,
gpte_to_spte(cpu, gpte,
set_pte(spte,
gpte_to_spte(cpu, gpte,
pte_flags(gpte) & _PAGE_DIRTY));
} else {
/*
* Otherwise kill it and we can demand_page()
* it in later.
*/
native_set_pte(spte, __pte(0));
set_pte(spte, __pte(0));
}
#ifdef CONFIG_X86_PAE
}
Expand Down Expand Up @@ -983,25 +983,22 @@ static unsigned long setup_pagetables(struct lguest *lg,
*/
for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
i += PTRS_PER_PTE, j++) {
/* FIXME: native_set_pmd is overkill here. */
native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));

if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
return -EFAULT;
}

/* One PGD entry, pointing to that PMD page. */
set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
/* Copy it in as the first PGD entry (ie. addresses 0-1G). */
if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
return -EFAULT;
/*
* And the third PGD entry (ie. addresses 3G-4G).
*
* FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000.
* And the other PGD entry to make the linear mapping at PAGE_OFFSET
*/
if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
return -EFAULT;
#else
/*
Expand Down Expand Up @@ -1141,23 +1138,21 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
{
pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
pte_t regs_pte;
unsigned long pfn;

#ifdef CONFIG_X86_PAE
pmd_t switcher_pmd;
pmd_t *pmd_table;

/* FIXME: native_set_pmd is overkill here. */
native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
PAGE_SHIFT, PAGE_KERNEL_EXEC));
switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT,
PAGE_KERNEL_EXEC);

/* Figure out where the pmd page is, by reading the PGD, and converting
* it to a virtual address. */
pmd_table = __va(pgd_pfn(cpu->lg->
pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
<< PAGE_SHIFT);
/* Now write it into the shadow page table. */
native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
#else
pgd_t switcher_pgd;

Expand All @@ -1179,10 +1174,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
* page is already mapped there, we don't have to copy them out
* again.
*/
pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
regs_pte);
regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL);
set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte);
}
/*:*/

Expand All @@ -1209,22 +1202,22 @@ static __init void populate_switcher_pte_page(unsigned int cpu,

/* The first entries are easy: they map the Switcher code. */
for (i = 0; i < pages; i++) {
native_set_pte(&pte[i], mk_pte(switcher_page[i],
set_pte(&pte[i], mk_pte(switcher_page[i],
__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
}

/* The only other thing we map is this CPU's pair of pages. */
i = pages + cpu*2;

/* First page (Guest registers) is writable from the Guest */
native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));

/*
* The second page contains the "struct lguest_ro_state", and is
* read-only.
*/
native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
}

Expand Down
Loading

0 comments on commit 1f0918d

Please sign in to comment.