Skip to content

Commit

Permalink
Merge tag 'threads-v5.10' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/brauner/linux

Pull pidfd updates from Christian Brauner:
 "This introduces a new extension to the pidfd_open() syscall. Users can
  now raise the new PIDFD_NONBLOCK flag to support non-blocking pidfd
  file descriptors. This has been requested for uses in async process
  management libraries such as async-pidfd in Rust.

  Ever since the introduction of pidfds and more advanced async io
  various programming languages such as Rust have grown support for
  async event libraries. These libraries are created to help build
  epoll-based event loops around file descriptors. A common pattern is
  to automatically make all file descriptors they manage to O_NONBLOCK.

  For such libraries the EAGAIN error code is treated specially. When a
  function is called that returns EAGAIN the function isn't called again
  until the event loop indicates the the file descriptor is ready.
  Supporting EAGAIN when waiting on pidfds makes such libraries just
  work with little effort.

  This introduces a new flag PIDFD_NONBLOCK that is equivalent to
  O_NONBLOCK. This follows the same patterns we have for other (anon
  inode) file descriptors such as EFD_NONBLOCK, IN_NONBLOCK,
  SFD_NONBLOCK, TFD_NONBLOCK and the same for close-on-exec flags.

  Passing a non-blocking pidfd to waitid() currently has no effect, i.e.
  is not supported. There are users which would like to use waitid() on
  pidfds that are O_NONBLOCK and mix it with pidfds that are blocking
  and both pass them to waitid().

  The expected behavior is to have waitid() return -EAGAIN for
  non-blocking pidfds and to block for blocking pidfds without needing
  to perform any additional checks for flags set on the pidfd before
  passing it to waitid(). Non-blocking pidfds will return EAGAIN from
  waitid() when no child process is ready yet. Returning -EAGAIN for
  non-blocking pidfds makes it easier for event loops that handle EAGAIN
  specially.

  It also makes the API more consistent and uniform. In essence,
  waitid() is treated like a read on a non-blocking pidfd or a recvmsg()
  on a non-blocking socket.

  With the addition of support for non-blocking pidfds we support the
  same functionality that sockets do. For sockets() recvmsg() supports
  MSG_DONTWAIT for pidfds waitid() supports WNOHANG. Both flags are
  per-call options. In contrast non-blocking pidfds and non-blocking
  sockets are a setting on an open file description affecting all
  threads in the calling process as well as other processes that hold
  file descriptors referring to the same open file description. Both
  behaviors, per call and per open file description, have genuine
  use-cases.

  The interaction with the WNOHANG flag is documented as follows:

   - If a non-blocking pidfd is passed and WNOHANG is not raised we
     simply raise the WNOHANG flag internally. When do_wait() returns
     indicating that there are eligible child processes but none have
     exited yet we set EAGAIN. If no child process exists we continue
     returning ECHILD.

   - If a non-blocking pidfd is passed and WNOHANG is raised waitid()
     will continue returning 0, i.e. it will not set EAGAIN. This ensure
     backwards compatibility with applications passing WNOHANG
     explicitly with pidfds"

* tag 'threads-v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  tests: remove O_NONBLOCK before waiting for WSTOPPED
  tests: add waitid() tests for non-blocking pidfds
  tests: port pidfd_wait to kselftest harness
  pidfd: support PIDFD_NONBLOCK in pidfd_open()
  exit: support non-blocking pidfds
  • Loading branch information
torvalds committed Oct 14, 2020
2 parents 612e7a4 + 01361b6 commit 4da9af0
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 183 deletions.
12 changes: 12 additions & 0 deletions include/uapi/linux/pidfd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */

#ifndef _UAPI_LINUX_PIDFD_H
#define _UAPI_LINUX_PIDFD_H

#include <linux/types.h>
#include <linux/fcntl.h>

/* Flags for pidfd_open(). */
#define PIDFD_NONBLOCK O_NONBLOCK

#endif /* _UAPI_LINUX_PIDFD_H */
15 changes: 12 additions & 3 deletions kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ static long do_wait(struct wait_opts *wo)
return retval;
}

static struct pid *pidfd_get_pid(unsigned int fd)
static struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
{
struct fd f;
struct pid *pid;
Expand All @@ -1484,8 +1484,10 @@ static struct pid *pidfd_get_pid(unsigned int fd)
return ERR_PTR(-EBADF);

pid = pidfd_pid(f.file);
if (!IS_ERR(pid))
if (!IS_ERR(pid)) {
get_pid(pid);
*flags = f.file->f_flags;
}

fdput(f);
return pid;
Expand All @@ -1498,6 +1500,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
struct pid *pid = NULL;
enum pid_type type;
long ret;
unsigned int f_flags = 0;

if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
__WNOTHREAD|__WCLONE|__WALL))
Expand Down Expand Up @@ -1531,9 +1534,10 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
if (upid < 0)
return -EINVAL;

pid = pidfd_get_pid(upid);
pid = pidfd_get_pid(upid, &f_flags);
if (IS_ERR(pid))
return PTR_ERR(pid);

break;
default:
return -EINVAL;
Expand All @@ -1544,7 +1548,12 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
wo.wo_flags = options;
wo.wo_info = infop;
wo.wo_rusage = ru;
if (f_flags & O_NONBLOCK)
wo.wo_flags |= WNOHANG;

ret = do_wait(&wo);
if (!ret && !(options & WNOHANG) && (f_flags & O_NONBLOCK))
ret = -EAGAIN;

put_pid(pid);
return ret;
Expand Down
12 changes: 7 additions & 5 deletions kernel/pid.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <linux/sched/task.h>
#include <linux/idr.h>
#include <net/sock.h>
#include <uapi/linux/pidfd.h>

struct pid init_struct_pid = {
.count = REFCOUNT_INIT(1),
Expand Down Expand Up @@ -522,7 +523,8 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
/**
* pidfd_create() - Create a new pid file descriptor.
*
* @pid: struct pid that the pidfd will reference
* @pid: struct pid that the pidfd will reference
* @flags: flags to pass
*
* This creates a new pid file descriptor with the O_CLOEXEC flag set.
*
Expand All @@ -532,12 +534,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
*/
static int pidfd_create(struct pid *pid)
static int pidfd_create(struct pid *pid, unsigned int flags)
{
int fd;

fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
O_RDWR | O_CLOEXEC);
flags | O_RDWR | O_CLOEXEC);
if (fd < 0)
put_pid(pid);

Expand Down Expand Up @@ -565,7 +567,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
int fd;
struct pid *p;

if (flags)
if (flags & ~PIDFD_NONBLOCK)
return -EINVAL;

if (pid <= 0)
Expand All @@ -576,7 +578,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
return -ESRCH;

if (pid_has_task(p, PIDTYPE_TGID))
fd = pidfd_create(p);
fd = pidfd_create(p, flags);
else
fd = -EINVAL;

Expand Down
4 changes: 4 additions & 0 deletions tools/testing/selftests/pidfd/pidfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
#define __NR_pidfd_getfd -1
#endif

#ifndef PIDFD_NONBLOCK
#define PIDFD_NONBLOCK O_NONBLOCK
#endif

/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
Expand Down
Loading

0 comments on commit 4da9af0

Please sign in to comment.