From 3d840e0636be64502f1d3da2e077dab390c04091 Mon Sep 17 00:00:00 2001 From: Igor Stoppa Date: Fri, 7 Sep 2018 20:48:02 +0300 Subject: [PATCH 1/6] virtio: add unlikely() to WARN_ON_ONCE() The condition to test is unlikely() to be true. Add the hint. Signed-off-by: Igor Stoppa Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Virtualization@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 7ef45a4a3cba72..6683b4a70b059c 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -127,7 +127,7 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) +#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ From 6166e5330c3828699859a6ec1af21eef7c58778c Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 3 Apr 2019 16:04:09 +0100 Subject: [PATCH 2/6] virtio: Fix indentation of VIRTIO_MMIO VIRTIO_MMIO config option block starts with a space, fix that. Signed-off-by: Fabrizio Castro Signed-off-by: Michael S. Tsirkin --- drivers/virtio/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 9aea44ed54c717..023fc3bc01c6f2 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -63,12 +63,12 @@ config VIRTIO_INPUT If unsure, say M. - config VIRTIO_MMIO +config VIRTIO_MMIO tristate "Platform bus driver for memory mapped virtio devices" depends on HAS_IOMEM && HAS_DMA - select VIRTIO - ---help--- - This drivers provides support for memory mapped virtio + select VIRTIO + ---help--- + This drivers provides support for memory mapped virtio platform device driver. If unsure, say N. From e82b9b0727ff6d665fff2d326162b460dded554d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 May 2019 00:29:49 -0400 Subject: [PATCH 3/6] vhost: introduce vhost_exceeds_weight() We used to have vhost_exceeds_weight() for vhost-net to: - prevent vhost kthread from hogging the cpu - balance the time spent between TX and RX This function could be useful for vsock and scsi as well. So move it to vhost.c. Device must specify a weight which counts the number of requests, or it can also specific a byte_weight which counts the number of bytes that has been processed. Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 22 ++++++---------------- drivers/vhost/scsi.c | 9 ++++++++- drivers/vhost/vhost.c | 20 +++++++++++++++++++- drivers/vhost/vhost.h | 5 ++++- drivers/vhost/vsock.c | 12 +++++++++++- 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index df51a35cf537c9..061a06dc12a34b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -604,12 +604,6 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, return iov_iter_count(iter); } -static bool vhost_exceeds_weight(int pkts, int total_len) -{ - return total_len >= VHOST_NET_WEIGHT || - pkts >= VHOST_NET_PKT_WEIGHT; -} - static int get_tx_bufs(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct msghdr *msg, @@ -845,10 +839,8 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->done_idx].len = 0; ++nvq->done_idx; - if (vhost_exceeds_weight(++sent_pkts, total_len)) { - vhost_poll_queue(&vq->poll); + if (vhost_exceeds_weight(vq, ++sent_pkts, total_len)) break; - } } vhost_tx_batch(net, nvq, sock, &msg); @@ -951,10 +943,9 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) else vhost_zerocopy_signal_used(net, vq); vhost_net_tx_packet(net); - if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts, + total_len))) break; - } } } @@ -1239,10 +1230,8 @@ static void handle_rx(struct vhost_net *net) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; - if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len))) goto out; - } } if (unlikely(busyloop_intr)) vhost_poll_queue(&vq->poll); @@ -1338,7 +1327,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, - UIO_MAXIOV + VHOST_NET_BATCH); + UIO_MAXIOV + VHOST_NET_BATCH, + VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c090d177bd75cf..27c9dac9f518c6 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -57,6 +57,12 @@ #define VHOST_SCSI_PREALLOC_UPAGES 2048 #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 +/* Max number of requests before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * request. + */ +#define VHOST_SCSI_WEIGHT 256 + struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ struct completion comp; @@ -1621,7 +1627,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV); + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, + VHOST_SCSI_WEIGHT, 0); vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1e3ed41ae1f3f3..3f3eac4bcc585b 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -413,8 +413,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) vhost_vq_free_iovecs(dev->vqs[i]); } +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, + int pkts, int total_len) +{ + struct vhost_dev *dev = vq->dev; + + if ((dev->byte_weight && total_len >= dev->byte_weight) || + pkts >= dev->weight) { + vhost_poll_queue(&vq->poll); + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(vhost_exceeds_weight); + void vhost_dev_init(struct vhost_dev *dev, - struct vhost_virtqueue **vqs, int nvqs, int iov_limit) + struct vhost_virtqueue **vqs, int nvqs, + int iov_limit, int weight, int byte_weight) { struct vhost_virtqueue *vq; int i; @@ -428,6 +444,8 @@ void vhost_dev_init(struct vhost_dev *dev, dev->mm = NULL; dev->worker = NULL; dev->iov_limit = iov_limit; + dev->weight = weight; + dev->byte_weight = byte_weight; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 9490e7ddb34048..27a78a9b8cc7dc 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -171,10 +171,13 @@ struct vhost_dev { struct list_head pending_list; wait_queue_head_t wait; int iov_limit; + int weight; + int byte_weight; }; +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, - int nvqs, int iov_limit); + int nvqs, int iov_limit, int weight, int byte_weight); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index bb5fc0e9fbc2d7..47c6d4d43c7078 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -21,6 +21,14 @@ #include "vhost.h" #define VHOST_VSOCK_DEFAULT_HOST_CID 2 +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_VSOCK_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * small pkts. + */ +#define VHOST_VSOCK_PKT_WEIGHT 256 enum { VHOST_VSOCK_FEATURES = VHOST_FEATURES, @@ -531,7 +539,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV); + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), + UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, + VHOST_VSOCK_WEIGHT); file->private_data = vsock; spin_lock_init(&vsock->send_pkt_list_lock); From e2412c07f8f3040593dfb88207865a3cd58680c0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 May 2019 00:29:50 -0400 Subject: [PATCH 4/6] vhost_net: fix possible infinite loop When the rx buffer is too small for a packet, we will discard the vq descriptor and retry it for the next packet: while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk, &busyloop_intr))) { ... /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); err = sock->ops->recvmsg(sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; } ... } This makes it possible to trigger a infinite while..continue loop through the co-opreation of two VMs like: 1) Malicious VM1 allocate 1 byte rx buffer and try to slow down the vhost process as much as possible e.g using indirect descriptors or other. 2) Malicious VM2 generate packets to VM1 as fast as possible Fixing this by checking against weight at the end of RX and TX loop. This also eliminate other similar cases when: - userspace is consuming the packets in the meanwhile - theoretical TOCTOU attack if guest moving avail index back and forth to hit the continue after vhost find guest just add new buffers This addresses CVE-2019-3900. Fixes: d8316f3991d20 ("vhost: fix total length when packets are too short") Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 061a06dc12a34b..2d9df786a9d318 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -773,7 +773,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); - for (;;) { + do { bool busyloop_intr = false; if (nvq->done_idx == VHOST_NET_BATCH) @@ -839,9 +839,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->done_idx].len = 0; ++nvq->done_idx; - if (vhost_exceeds_weight(vq, ++sent_pkts, total_len)) - break; - } + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); vhost_tx_batch(net, nvq, sock, &msg); } @@ -866,7 +864,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) bool zcopy_used; int sent_pkts = 0; - for (;;) { + do { bool busyloop_intr; /* Release DMAs done buffers first */ @@ -943,10 +941,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) else vhost_zerocopy_signal_used(net, vq); vhost_net_tx_packet(net); - if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts, - total_len))) - break; - } + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); } /* Expects to be always run from workqueue - which acts as @@ -1144,8 +1139,11 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk, - &busyloop_intr))) { + do { + sock_len = vhost_net_rx_peek_head_len(net, sock->sk, + &busyloop_intr); + if (!sock_len) + break; sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, @@ -1230,12 +1228,11 @@ static void handle_rx(struct vhost_net *net) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; - if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len))) - goto out; - } + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); + if (unlikely(busyloop_intr)) vhost_poll_queue(&vq->poll); - else + else if (!sock_len) vhost_net_enable_vq(net, vq); out: vhost_net_signal_used(nvq); @@ -1328,7 +1325,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, - VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT); + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); From e79b431fb901ba1106670bcc80b9b617b25def7d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 May 2019 00:29:51 -0400 Subject: [PATCH 5/6] vhost: vsock: add weight support This patch will check the weight and exit the loop if we exceeds the weight. This is useful for preventing vsock kthread from hogging cpu which is guest triggerable. The weight can help to avoid starving the request from on direction while another direction is being processed. The value of weight is picked from vhost-net. This addresses CVE-2019-3900. Cc: Stefan Hajnoczi Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 47c6d4d43c7078..814bed72d79365 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -86,6 +86,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) { struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int pkts = 0, total_len = 0; bool added = false; bool restart_tx = false; @@ -97,7 +98,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, /* Avoid further vmexits, we're already processing the virtqueue */ vhost_disable_notify(&vsock->dev, vq); - for (;;) { + do { struct virtio_vsock_pkt *pkt; struct iov_iter iov_iter; unsigned out, in; @@ -182,8 +183,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, */ virtio_transport_deliver_tap_pkt(pkt); + total_len += pkt->len; virtio_transport_free_pkt(pkt); - } + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); @@ -358,7 +360,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, dev); struct virtio_vsock_pkt *pkt; - int head; + int head, pkts = 0, total_len = 0; unsigned int out, in; bool added = false; @@ -368,7 +370,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) goto out; vhost_disable_notify(&vsock->dev, vq); - for (;;) { + do { u32 len; if (!vhost_vsock_more_replies(vsock)) { @@ -409,9 +411,11 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) else virtio_transport_free_pkt(pkt); - vhost_add_used(vq, head, sizeof(pkt->hdr) + len); + len += sizeof(pkt->hdr); + vhost_add_used(vq, head, len); + total_len += len; added = true; - } + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); no_more_replies: if (added) From c1ea02f15ab5efb3e93fc3144d895410bf79fcf2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 May 2019 00:29:52 -0400 Subject: [PATCH 6/6] vhost: scsi: add weight support This patch will check the weight and exit the loop if we exceeds the weight. This is useful for preventing scsi kthread from hogging cpu which is guest triggerable. This addresses CVE-2019-3900. Cc: Paolo Bonzini Cc: Stefan Hajnoczi Fixes: 057cbf49a1f0 ("tcm_vhost: Initial merge for vhost level target fabric driver") Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 27c9dac9f518c6..a9caf1bc3c3ef2 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -918,7 +918,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) struct iov_iter in_iter, prot_iter, data_iter; u64 tag; u32 exp_data_len, data_direction; - int ret, prot_bytes; + int ret, prot_bytes, c = 0; u16 lun; u8 task_attr; bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); @@ -938,7 +938,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vhost_disable_notify(&vs->dev, vq); - for (;;) { + do { ret = vhost_scsi_get_desc(vs, vq, &vc); if (ret) goto err; @@ -1118,7 +1118,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) break; else if (ret == -EIO) vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); - } + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); } @@ -1177,7 +1177,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) } v_req; struct vhost_scsi_ctx vc; size_t typ_size; - int ret; + int ret, c = 0; mutex_lock(&vq->mutex); /* @@ -1191,7 +1191,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vhost_disable_notify(&vs->dev, vq); - for (;;) { + do { ret = vhost_scsi_get_desc(vs, vq, &vc); if (ret) goto err; @@ -1270,7 +1270,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) break; else if (ret == -EIO) vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); - } + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); }