Skip to content

Commit

Permalink
tcp: connect() race with timewait reuse
Browse files Browse the repository at this point in the history
Its currently possible that several threads issuing a connect() find
the same timewait socket and try to reuse it, leading to list
corruptions.

Condition for bug is that these threads bound their socket on same
address/port of to-be-find timewait socket, and connected to same
target. (SO_REUSEADDR needed)

To fix this problem, we could unhash timewait socket while holding
ehash lock, to make sure lookups/changes will be serialized. Only
first thread finds the timewait socket, other ones find the
established socket and return an EADDRNOTAVAIL error.

This second version takes into account Evgeniy's review and makes sure
inet_twsk_put() is called outside of locked sections.

Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Eric Dumazet authored and davem330 committed Dec 4, 2009
1 parent ff33a6e commit 13475a3
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 18 deletions.
2 changes: 2 additions & 0 deletions include/net/inet_timewait_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ static inline __be32 inet_rcv_saddr(const struct sock *sk)

extern void inet_twsk_put(struct inet_timewait_sock *tw);

extern int inet_twsk_unhash(struct inet_timewait_sock *tw);

extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
const int state);

Expand Down
10 changes: 7 additions & 3 deletions net/ipv4/inet_hashtables.c
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw;
int twrefcnt = 0;

spin_lock(lock);

Expand Down Expand Up @@ -318,20 +319,23 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
twrefcnt = inet_twsk_unhash(tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
if (twrefcnt)
inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);

if (twp) {
*twp = tw;
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);

inet_twsk_put(tw);
}

return 0;

not_unique:
Expand Down
38 changes: 28 additions & 10 deletions net/ipv4/inet_timewait_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,57 @@
#include <net/inet_timewait_sock.h>
#include <net/ip.h>


/*
* unhash a timewait socket from established hash
* lock must be hold by caller
*/
int inet_twsk_unhash(struct inet_timewait_sock *tw)
{
if (hlist_nulls_unhashed(&tw->tw_node))
return 0;

hlist_nulls_del_rcu(&tw->tw_node);
sk_nulls_node_init(&tw->tw_node);
return 1;
}

/* Must be called with locally disabled BHs. */
static void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb;
int refcnt;
/* Unlink from established hashes. */
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);

spin_lock(lock);
if (hlist_nulls_unhashed(&tw->tw_node)) {
spin_unlock(lock);
return;
}
hlist_nulls_del_rcu(&tw->tw_node);
sk_nulls_node_init(&tw->tw_node);
refcnt = inet_twsk_unhash(tw);
spin_unlock(lock);

/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
if (tb) {
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
refcnt++;
}
spin_unlock(&bhead->lock);
#ifdef SOCK_REFCNT_DEBUG
if (atomic_read(&tw->tw_refcnt) != 1) {
printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n",
tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
}
#endif
inet_twsk_put(tw);
while (refcnt) {
inet_twsk_put(tw);
refcnt--;
}
}

static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
Expand Down Expand Up @@ -92,6 +109,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
atomic_inc(&tw->tw_refcnt);
spin_unlock(&bhead->lock);

spin_lock(lock);
Expand Down
15 changes: 10 additions & 5 deletions net/ipv6/inet6_hashtables.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw;
int twrefcnt = 0;

spin_lock(lock);

Expand Down Expand Up @@ -250,19 +251,23 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
* in hash table socket with a funny identity. */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
sk->sk_hash = hash;
if (tw) {
twrefcnt = inet_twsk_unhash(tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
if (twrefcnt)
inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);

if (twp != NULL) {
if (twp) {
*twp = tw;
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw != NULL) {
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);

inet_twsk_put(tw);
}
Expand Down

0 comments on commit 13475a3

Please sign in to comment.