Skip to content

Commit

Permalink
net: Work around lockdep limitation in sockets that use sockets
Browse files Browse the repository at this point in the history
Lockdep issues a circular dependency warning when AFS issues an operation
through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem.

The theory lockdep comes up with is as follows:

 (1) If the pagefault handler decides it needs to read pages from AFS, it
     calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but
     creating a call requires the socket lock:

	mmap_sem must be taken before sk_lock-AF_RXRPC

 (2) afs_open_socket() opens an AF_RXRPC socket and binds it.  rxrpc_bind()
     binds the underlying UDP socket whilst holding its socket lock.
     inet_bind() takes its own socket lock:

	sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET

 (3) Reading from a TCP socket into a userspace buffer might cause a fault
     and thus cause the kernel to take the mmap_sem, but the TCP socket is
     locked whilst doing this:

	sk_lock-AF_INET must be taken before mmap_sem

However, lockdep's theory is wrong in this instance because it deals only
with lock classes and not individual locks.  The AF_INET lock in (2) isn't
really equivalent to the AF_INET lock in (3) as the former deals with a
socket entirely internal to the kernel that never sees userspace.  This is
a limitation in the design of lockdep.

Fix the general case by:

 (1) Double up all the locking keys used in sockets so that one set are
     used if the socket is created by userspace and the other set is used
     if the socket is created by the kernel.

 (2) Store the kern parameter passed to sk_alloc() in a variable in the
     sock struct (sk_kern_sock).  This informs sock_lock_init(),
     sock_init_data() and sk_clone_lock() as to the lock keys to be used.

     Note that the child created by sk_clone_lock() inherits the parent's
     kern setting.

 (3) Add a 'kern' parameter to ->accept() that is analogous to the one
     passed in to ->create() that distinguishes whether kernel_accept() or
     sys_accept4() was the caller and can be passed to sk_alloc().

     Note that a lot of accept functions merely dequeue an already
     allocated socket.  I haven't touched these as the new socket already
     exists before we get the parameter.

     Note also that there are a couple of places where I've made the accepted
     socket unconditionally kernel-based:

	irda_accept()
	rds_rcp_accept_one()
	tcp_accept_from_sock()

     because they follow a sock_create_kern() and accept off of that.

Whilst creating this, I noticed that lustre and ocfs don't create sockets
through sock_create_kern() and thus they aren't marked as for-kernel,
though they appear to be internal.  I wonder if these should do that so
that they use the new set of lock keys.

Signed-off-by: David Howells <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
dhowells authored and davem330 committed Mar 10, 2017
1 parent 81dca07 commit cdfbabf
Show file tree
Hide file tree
Showing 38 changed files with 142 additions and 108 deletions.
9 changes: 5 additions & 4 deletions crypto/af_alg.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
return err;
}

int af_alg_accept(struct sock *sk, struct socket *newsock)
int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
{
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
Expand All @@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
if (!type)
goto unlock;

sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
err = -ENOMEM;
if (!sk2)
goto unlock;
Expand Down Expand Up @@ -323,9 +323,10 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
}
EXPORT_SYMBOL_GPL(af_alg_accept);

static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
return af_alg_accept(sock->sk, newsock);
return af_alg_accept(sock->sk, newsock, kern);
}

static const struct proto_ops alg_proto_ops = {
Expand Down
9 changes: 5 additions & 4 deletions crypto/algif_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
return err ?: len;
}

static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
Expand All @@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
if (err)
return err;

err = af_alg_accept(ask->parent, newsock);
err = af_alg_accept(ask->parent, newsock, kern);
if (err)
return err;

Expand Down Expand Up @@ -378,15 +379,15 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
}

static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
int flags)
int flags, bool kern)
{
int err;

err = hash_check_key(sock);
if (err)
return err;

return hash_accept(sock, newsock, flags);
return hash_accept(sock, newsock, flags, kern);
}

static struct proto_ops algif_hash_ops_nokey = {
Expand Down
4 changes: 2 additions & 2 deletions drivers/staging/lustre/lnet/lnet/lib-socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -532,15 +532,15 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)

newsock->ops = sock->ops;

rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
if (rc == -EAGAIN) {
/* Nothing ready, so wait for activity */
init_waitqueue_entry(&wait, current);
add_wait_queue(sk_sleep(sock->sk), &wait);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
remove_wait_queue(sk_sleep(sock->sk), &wait);
rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
}

if (rc)
Expand Down
2 changes: 1 addition & 1 deletion fs/dlm/lowcomms.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
newsock->type = con->sock->type;
newsock->ops = con->sock->ops;

result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
if (result < 0)
goto accept_err;

Expand Down
2 changes: 1 addition & 1 deletion fs/ocfs2/cluster/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)

new_sock->type = sock->type;
new_sock->ops = sock->ops;
ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
if (ret < 0)
goto out;

Expand Down
2 changes: 1 addition & 1 deletion include/crypto/if_alg.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);

int af_alg_release(struct socket *sock);
void af_alg_release_parent(struct sock *sk);
int af_alg_accept(struct sock *sk, struct socket *newsock);
int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);

int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
void af_alg_free_sg(struct af_alg_sgl *sgl);
Expand Down
2 changes: 1 addition & 1 deletion include/linux/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ struct proto_ops {
int (*socketpair)(struct socket *sock1,
struct socket *sock2);
int (*accept) (struct socket *sock,
struct socket *newsock, int flags);
struct socket *newsock, int flags, bool kern);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int *sockaddr_len, int peer);
Expand Down
3 changes: 2 additions & 1 deletion include/net/inet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags, int is_sendmsg);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
int inet_accept(struct socket *sock, struct socket *newsock, int flags);
int inet_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
Expand Down
2 changes: 1 addition & 1 deletion include/net/inet_connection_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
return (unsigned long)min_t(u64, when, max_when);
}

struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);

int inet_csk_get_port(struct sock *sk, unsigned short snum);

Expand Down
3 changes: 2 additions & 1 deletion include/net/sctp/structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,8 @@ struct sctp_pf {
int (*send_verify) (struct sctp_sock *, union sctp_addr *);
int (*supported_addrs)(const struct sctp_sock *, __be16 *);
struct sock *(*create_accept_sk) (struct sock *sk,
struct sctp_association *asoc);
struct sctp_association *asoc,
bool kern);
int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
Expand Down
9 changes: 6 additions & 3 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ struct sock_common {
* @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
* @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
* @sk_lock: synchronizer
* @sk_kern_sock: True if sock is using kernel lock classes
* @sk_rcvbuf: size of receive buffer in bytes
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
Expand Down Expand Up @@ -430,7 +431,8 @@ struct sock {
#endif

kmemcheck_bitfield_begin(flags);
unsigned int sk_padding : 2,
unsigned int sk_padding : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
sk_userlocks : 4,
Expand Down Expand Up @@ -1015,7 +1017,8 @@ struct proto {
int addr_len);
int (*disconnect)(struct sock *sk, int flags);

struct sock * (*accept)(struct sock *sk, int flags, int *err);
struct sock * (*accept)(struct sock *sk, int flags, int *err,
bool kern);

int (*ioctl)(struct sock *sk, int cmd,
unsigned long arg);
Expand Down Expand Up @@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int);
int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
unsigned int sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
Expand Down
5 changes: 3 additions & 2 deletions net/atm/svc.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ static int svc_listen(struct socket *sock, int backlog)
return error;
}

static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
Expand All @@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)

lock_sock(sk);

error = svc_create(sock_net(sk), newsock, 0, 0);
error = svc_create(sock_net(sk), newsock, 0, kern);
if (error)
goto out;

Expand Down
3 changes: 2 additions & 1 deletion net/ax25/af_ax25.c
Original file line number Diff line number Diff line change
Expand Up @@ -1320,7 +1320,8 @@ static int __must_check ax25_connect(struct socket *sock,
return err;
}

static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
Expand Down
2 changes: 1 addition & 1 deletion net/bluetooth/l2cap_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
}

static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
int flags)
int flags, bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
Expand Down
3 changes: 2 additions & 1 deletion net/bluetooth/rfcomm/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog)
return err;
}

static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
Expand Down
2 changes: 1 addition & 1 deletion net/bluetooth/sco.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ static int sco_sock_listen(struct socket *sock, int backlog)
}

static int sco_sock_accept(struct socket *sock, struct socket *newsock,
int flags)
int flags, bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
Expand Down
Loading

0 comments on commit cdfbabf

Please sign in to comment.