Skip to content

Commit

Permalink
net: fix sock_wake_async() rcu protection
Browse files Browse the repository at this point in the history
Dmitry provided a syzkaller (http://github.com/google/syzkaller)
triggering a fault in sock_wake_async() when async IO is requested.

Said program stressed af_unix sockets, but the issue is generic
and should be addressed in core networking stack.

The problem is that by the time sock_wake_async() is called,
we should not access the @flags field of 'struct socket',
as the inode containing this socket might be freed without
further notice, and without RCU grace period.

We already maintain an RCU protected structure, "struct socket_wq"
so moving SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA into it
is the safe route.

It also reduces number of cache lines needing dirtying, so might
provide a performance improvement anyway.

In followup patches, we might move remaining flags (SOCK_NOSPACE,
SOCK_PASSCRED, SOCK_PASSSEC) to save 8 bytes and let 'struct socket'
being mostly read and let it being shared between cpus.

Reported-by: Dmitry Vyukov <[email protected]>
Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Eric Dumazet authored and davem330 committed Dec 1, 2015
1 parent 9cd3e07 commit ceb5d58
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 33 deletions.
7 changes: 6 additions & 1 deletion include/linux/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ struct inode;
struct file;
struct net;

/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
* in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
* Eventually all flags will be in sk->sk_wq_flags.
*/
#define SOCKWQ_ASYNC_NOSPACE 0
#define SOCKWQ_ASYNC_WAITDATA 1
#define SOCK_NOSPACE 2
Expand Down Expand Up @@ -89,6 +93,7 @@ struct socket_wq {
/* Note: wait MUST be first field of socket_wq */
wait_queue_head_t wait;
struct fasync_struct *fasync_list;
unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;

Expand Down Expand Up @@ -202,7 +207,7 @@ enum {
SOCK_WAKE_URG,
};

int sock_wake_async(struct socket *sk, int how, int band);
int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
int sock_register(const struct net_proto_family *fam);
void sock_unregister(int family);
int __sock_create(struct net *net, int family, int type, int proto,
Expand Down
23 changes: 16 additions & 7 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,10 @@ struct sock {
int sk_rcvbuf;

struct sk_filter __rcu *sk_filter;
struct socket_wq __rcu *sk_wq;

union {
struct socket_wq __rcu *sk_wq;
struct socket_wq *sk_wq_raw;
};
#ifdef CONFIG_XFRM
struct xfrm_policy *sk_policy[2];
#endif
Expand Down Expand Up @@ -2005,20 +2007,27 @@ static inline unsigned long sock_wspace(struct sock *sk)
return amt;
}

/* Note:
* We use sk->sk_wq_raw, from contexts knowing this
* pointer is not NULL and cannot disappear/change.
*/
static inline void sk_set_bit(int nr, struct sock *sk)
{
set_bit(nr, &sk->sk_socket->flags);
set_bit(nr, &sk->sk_wq_raw->flags);
}

static inline void sk_clear_bit(int nr, struct sock *sk)
{
clear_bit(nr, &sk->sk_socket->flags);
clear_bit(nr, &sk->sk_wq_raw->flags);
}

static inline void sk_wake_async(struct sock *sk, int how, int band)
static inline void sk_wake_async(const struct sock *sk, int how, int band)
{
if (sock_flag(sk, SOCK_FASYNC))
sock_wake_async(sk->sk_socket, how, band);
if (sock_flag(sk, SOCK_FASYNC)) {
rcu_read_lock();
sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
rcu_read_unlock();
}
}

/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
Expand Down
2 changes: 1 addition & 1 deletion net/core/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk)
wake_up_interruptible_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
}
Expand Down
24 changes: 14 additions & 10 deletions net/sctp/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -6801,26 +6801,30 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
static void __sctp_write_space(struct sctp_association *asoc)
{
struct sock *sk = asoc->base.sk;
struct socket *sock = sk->sk_socket;

if ((sctp_wspace(asoc) > 0) && sock) {
if (waitqueue_active(&asoc->wait))
wake_up_interruptible(&asoc->wait);
if (sctp_wspace(asoc) <= 0)
return;

if (waitqueue_active(&asoc->wait))
wake_up_interruptible(&asoc->wait);

if (sctp_writeable(sk)) {
wait_queue_head_t *wq = sk_sleep(sk);
if (sctp_writeable(sk)) {
struct socket_wq *wq;

if (wq && waitqueue_active(wq))
wake_up_interruptible(wq);
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq) {
if (waitqueue_active(&wq->wait))
wake_up_interruptible(&wq->wait);

/* Note that we try to include the Async I/O support
* here by modeling from the current TCP/UDP code.
* We have not tested with it yet.
*/
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock,
SOCK_WAKE_SPACE, POLL_OUT);
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
}

Expand Down
21 changes: 7 additions & 14 deletions net/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -1056,27 +1056,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
return 0;
}

/* This function may be called only under socket lock or callback_lock or rcu_lock */
/* This function may be called only under rcu_lock */

int sock_wake_async(struct socket *sock, int how, int band)
int sock_wake_async(struct socket_wq *wq, int how, int band)
{
struct socket_wq *wq;

if (!sock)
return -1;
rcu_read_lock();
wq = rcu_dereference(sock->wq);
if (!wq || !wq->fasync_list) {
rcu_read_unlock();
if (!wq || !wq->fasync_list)
return -1;
}

switch (how) {
case SOCK_WAKE_WAITD:
if (test_bit(SOCKWQ_ASYNC_WAITDATA, &sock->flags))
if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
break;
goto call_kill;
case SOCK_WAKE_SPACE:
if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags))
if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
break;
/* fall through */
case SOCK_WAKE_IO:
Expand All @@ -1086,7 +1079,7 @@ int sock_wake_async(struct socket *sock, int how, int band)
case SOCK_WAKE_URG:
kill_fasync(&wq->fasync_list, SIGURG, band);
}
rcu_read_unlock();

return 0;
}
EXPORT_SYMBOL(sock_wake_async);
Expand Down

0 comments on commit ceb5d58

Please sign in to comment.