Skip to content

Commit

Permalink
Merge tag 'notifications-pipe-prep-20191115' of git://git.kernel.org/…
Browse files Browse the repository at this point in the history
…pub/scm/linux/kernel/git/dhowells/linux-fs

Pull pipe rework from David Howells:
 "This is my set of preparatory patches for building a general
  notification queue on top of pipes. It makes a number of significant
  changes:

   - It removes the nr_exclusive argument from __wake_up_sync_key() as
     this is always 1. This prepares for the next step:

   - Adds wake_up_interruptible_sync_poll_locked() so that poll can be
     woken up from a function that's holding the poll waitqueue
     spinlock.

   - Change the pipe buffer ring to be managed in terms of unbounded
     head and tail indices rather than bounded index and length. This
     means that reading the pipe only needs to modify one index, not
     two.

   - A selection of helper functions are provided to query the state of
     the pipe buffer, plus a couple to apply updates to the pipe
     indices.

   - The pipe ring is allowed to have kernel-reserved slots. This allows
     many notification messages to be spliced in by the kernel without
     allowing userspace to pin too many pages if it writes to the same
     pipe.

   - Advance the head and tail indices inside the pipe waitqueue lock
     and use wake_up_interruptible_sync_poll_locked() to poke poll
     without having to take the lock twice.

   - Rearrange pipe_write() to preallocate the buffer it is going to
     write into and then drop the spinlock. This allows kernel
     notifications to then be added the ring whilst it is filling the
     buffer it allocated. The read side is stalled because the pipe
     mutex is still held.

   - Don't wake up readers on a pipe if there was already data in it
     when we added more.

   - Don't wake up writers on a pipe if the ring wasn't full before we
     removed a buffer"

* tag 'notifications-pipe-prep-20191115' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  pipe: Remove sync on wake_ups
  pipe: Increase the writer-wakeup threshold to reduce context-switch count
  pipe: Check for ring full inside of the spinlock in pipe_write()
  pipe: Remove redundant wakeup from pipe_write()
  pipe: Rearrange sequence in pipe_write() to preallocate slot
  pipe: Conditionalise wakeup in pipe_read()
  pipe: Advance tail pointer inside of wait spinlock in pipe_read()
  pipe: Allow pipes to have kernel-reserved slots
  pipe: Use head and tail pointers for the ring, not cursor and length
  Add wake_up_interruptible_sync_poll_locked()
  Remove the nr_exclusive argument from __wake_up_sync_key()
  pipe: Reduce #inclusion of pipe_fs_i.h
  • Loading branch information
torvalds committed Nov 30, 2019
2 parents 32ef955 + 3c0edea commit 6a96566
Show file tree
Hide file tree
Showing 13 changed files with 532 additions and 334 deletions.
16 changes: 7 additions & 9 deletions drivers/char/virtio_console.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
.pos = *ppos,
.u.data = &sgl,
};
unsigned int occupancy;

/*
* Rproc_serial does not yet support splice. To support splice
Expand All @@ -929,29 +930,26 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
if (is_rproc_serial(port->out_vq->vdev))
return -EINVAL;

/*
* pipe->nrbufs == 0 means there are no data to transfer,
* so this returns just 0 for no data.
*/
pipe_lock(pipe);
if (!pipe->nrbufs) {
ret = 0;
ret = 0;
if (pipe_empty(pipe->head, pipe->tail))
goto error_out;
}

ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
if (ret < 0)
goto error_out;

buf = alloc_buf(port->portdev->vdev, 0, pipe->nrbufs);
occupancy = pipe_occupancy(pipe->head, pipe->tail);
buf = alloc_buf(port->portdev->vdev, 0, occupancy);

if (!buf) {
ret = -ENOMEM;
goto error_out;
}

sgl.n = 0;
sgl.len = 0;
sgl.size = pipe->nrbufs;
sgl.size = occupancy;
sgl.sg = buf->sg;
sg_init_table(sgl.sg, sgl.size);
ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
Expand Down
1 change: 0 additions & 1 deletion fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
#include <linux/kmod.h>
#include <linux/fsnotify.h>
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
#include <linux/vmalloc.h>
Expand Down
31 changes: 18 additions & 13 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->pipebufs++;
cs->nr_segs--;
} else {
if (cs->nr_segs == cs->pipe->buffers)
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;

page = alloc_page(GFP_HIGHUSER);
Expand Down Expand Up @@ -881,7 +881,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
struct pipe_buffer *buf;
int err;

if (cs->nr_segs == cs->pipe->buffers)
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;

err = unlock_request(cs->req);
Expand Down Expand Up @@ -1343,7 +1343,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fud)
return -EPERM;

bufs = kvmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
GFP_KERNEL);
if (!bufs)
return -ENOMEM;
Expand All @@ -1355,7 +1355,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;

if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
ret = -EIO;
goto out;
}
Expand Down Expand Up @@ -1937,6 +1937,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
unsigned int head, tail, mask, count;
unsigned nbuf;
unsigned idx;
struct pipe_buffer *bufs;
Expand All @@ -1951,17 +1952,21 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,

pipe_lock(pipe);

bufs = kvmalloc_array(pipe->nrbufs, sizeof(struct pipe_buffer),
GFP_KERNEL);
head = pipe->head;
tail = pipe->tail;
mask = pipe->ring_size - 1;
count = head - tail;

bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs) {
pipe_unlock(pipe);
return -ENOMEM;
}

nbuf = 0;
rem = 0;
for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
for (idx = tail; idx < head && rem < len; idx++)
rem += pipe->bufs[idx & mask].len;

ret = -EINVAL;
if (rem < len)
Expand All @@ -1972,16 +1977,16 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf;
struct pipe_buffer *obuf;

BUG_ON(nbuf >= pipe->buffers);
BUG_ON(!pipe->nrbufs);
ibuf = &pipe->bufs[pipe->curbuf];
BUG_ON(nbuf >= pipe->ring_size);
BUG_ON(tail == head);
ibuf = &pipe->bufs[tail & mask];
obuf = &bufs[nbuf];

if (rem >= ibuf->len) {
*obuf = *ibuf;
ibuf->ops = NULL;
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
tail++;
pipe->tail = tail;
} else {
if (!pipe_buf_get(pipe, ibuf))
goto out_free;
Expand Down
1 change: 0 additions & 1 deletion fs/ocfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <linux/pagemap.h>
#include <asm/byteorder.h>
#include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
Expand Down
Loading

0 comments on commit 6a96566

Please sign in to comment.