Skip to content

Commit

Permalink
futex: Implement sys_futex_waitv()
Browse files Browse the repository at this point in the history
Add support to wait on multiple futexes. This is the interface
implemented by this syscall:

futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
	    unsigned int flags, struct timespec *timeout, clockid_t clockid)

struct futex_waitv {
	__u64 val;
	__u64 uaddr;
	__u32 flags;
	__u32 __reserved;
};

Given an array of struct futex_waitv, wait on each uaddr. The thread
wakes if a futex_wake() is performed at any uaddr. The syscall returns
immediately if any waiter has *uaddr != val. *timeout is an optional
absolute timeout value for the operation. This syscall supports only
64bit sized timeout structs. The flags argument of the syscall should be
empty, but it can be used for future extensions. Flags for shared
futexes, sizes, etc. should be used on the individual flags of each
waiter.

__reserved is used for explicit padding and should be 0, but it might be
used for future extensions. If the userspace uses 32-bit pointers, it
should make sure to explicitly cast it when assigning to waitv::uaddr.

Returns the array index of one of the woken futexes. There’s no given
information of how many were woken, or any particular attribute of it
(if it’s the first woken, if it is of the smaller index...).

Signed-off-by: André Almeida <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
  • Loading branch information
andrealmeid authored and Peter Zijlstra committed Oct 7, 2021
1 parent bff7c57 commit bf69bad
Show file tree
Hide file tree
Showing 8 changed files with 371 additions and 1 deletion.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -7718,6 +7718,7 @@ M: Ingo Molnar <[email protected]>
R: Peter Zijlstra <[email protected]>
R: Darren Hart <[email protected]>
R: Davidlohr Bueso <[email protected]>
R: André Almeida <[email protected]>
L: [email protected]
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
Expand Down
5 changes: 5 additions & 0 deletions include/linux/syscalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct mq_attr;
struct compat_stat;
struct old_timeval32;
struct robust_list_head;
struct futex_waitv;
struct getcpu_cache;
struct old_linux_dirent;
struct perf_event_attr;
Expand Down Expand Up @@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid,
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);

asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
unsigned int nr_futexes, unsigned int flags,
struct __kernel_timespec __user *timeout, clockid_t clockid);

/* kernel/hrtimer.c */
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
struct __kernel_timespec __user *rmtp);
Expand Down
5 changes: 4 additions & 1 deletion include/uapi/asm-generic/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
#define __NR_process_mrelease 448
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)

#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)

#undef __NR_syscalls
#define __NR_syscalls 449
#define __NR_syscalls 450

/*
* 32 bit systems traditionally used different
Expand Down
25 changes: 25 additions & 0 deletions include/uapi/linux/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@
#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
FUTEX_PRIVATE_FLAG)

/*
* Flags to specify the bit length of the futex word for futex2 syscalls.
* Currently, only 32 is supported.
*/
#define FUTEX_32 2

/*
* Max numbers of elements in a futex_waitv array
*/
#define FUTEX_WAITV_MAX 128

/**
* struct futex_waitv - A waiter for vectorized wait
* @val: Expected value at uaddr
* @uaddr: User address to wait on
* @flags: Flags for this waiter
* @__reserved: Reserved member to preserve data alignment. Should be 0.
*/
struct futex_waitv {
__u64 val;
__u64 uaddr;
__u32 flags;
__u32 __reserved;
};

/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
Expand Down
15 changes: 15 additions & 0 deletions kernel/futex/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,21 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
ktime_t *abs_time, u32 bitset);

/**
* struct futex_vector - Auxiliary struct for futex_waitv()
* @w: Userspace provided data
* @q: Kernel side data
*
* Struct used to build an array with all data need for futex_waitv()
*/
struct futex_vector {
struct futex_waitv w;
struct futex_q q;
};

extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
struct hrtimer_sleeper *to);

extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);

extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
Expand Down
119 changes: 119 additions & 0 deletions kernel/futex/syscalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,125 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}

/* Mask of available flags for each futex in futex_waitv list */
#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)

/**
* futex_parse_waitv - Parse a waitv array from userspace
* @futexv: Kernel side list of waiters to be filled
* @uwaitv: Userspace list to be parsed
* @nr_futexes: Length of futexv
*
* Return: Error code on failure, 0 on success
*/
static int futex_parse_waitv(struct futex_vector *futexv,
struct futex_waitv __user *uwaitv,
unsigned int nr_futexes)
{
struct futex_waitv aux;
unsigned int i;

for (i = 0; i < nr_futexes; i++) {
if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
return -EFAULT;

if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
return -EINVAL;

if (!(aux.flags & FUTEX_32))
return -EINVAL;

futexv[i].w.flags = aux.flags;
futexv[i].w.val = aux.val;
futexv[i].w.uaddr = aux.uaddr;
futexv[i].q = futex_q_init;
}

return 0;
}

/**
* sys_futex_waitv - Wait on a list of futexes
* @waiters: List of futexes to wait on
* @nr_futexes: Length of futexv
* @flags: Flag for timeout (monotonic/realtime)
* @timeout: Optional absolute timeout.
* @clockid: Clock to be used for the timeout, realtime or monotonic.
*
* Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
* if a futex_wake() is performed at any uaddr. The syscall returns immediately
* if any waiter has *uaddr != val. *timeout is an optional timeout value for
* the operation. Each waiter has individual flags. The `flags` argument for
* the syscall should be used solely for specifying the timeout as realtime, if
* needed. Flags for private futexes, sizes, etc. should be used on the
* individual flags of each waiter.
*
* Returns the array index of one of the woken futexes. No further information
* is provided: any number of other futexes may also have been woken by the
* same event, and if more than one futex was woken, the retrned index may
* refer to any one of them. (It is not necessaryily the futex with the
* smallest index, nor the one most recently woken, nor...)
*/

SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
unsigned int, nr_futexes, unsigned int, flags,
struct __kernel_timespec __user *, timeout, clockid_t, clockid)
{
struct hrtimer_sleeper to;
struct futex_vector *futexv;
struct timespec64 ts;
ktime_t time;
int ret;

/* This syscall supports no flags for now */
if (flags)
return -EINVAL;

if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
return -EINVAL;

if (timeout) {
int flag_clkid = 0, flag_init = 0;

if (clockid == CLOCK_REALTIME) {
flag_clkid = FLAGS_CLOCKRT;
flag_init = FUTEX_CLOCK_REALTIME;
}

if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
return -EINVAL;

if (get_timespec64(&ts, timeout))
return -EFAULT;

/*
* Since there's no opcode for futex_waitv, use
* FUTEX_WAIT_BITSET that uses absolute timeout as well
*/
ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
if (ret)
return ret;

futex_setup_timer(&time, &to, flag_clkid, 0);
}

futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
if (!futexv)
return -ENOMEM;

ret = futex_parse_waitv(futexv, waiters, nr_futexes);
if (!ret)
ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);

if (timeout) {
hrtimer_cancel(&to.timer);
destroy_hrtimer_on_stack(&to.timer);
}

kfree(futexv);
return ret;
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(set_robust_list,
struct compat_robust_list_head __user *, head,
Expand Down
Loading

0 comments on commit bf69bad

Please sign in to comment.