Skip to content

Commit

Permalink
Merge branch 'mptcp-misc-fixes'
Browse files Browse the repository at this point in the history
Matthieu Baerts says:

====================
mptcp: locking cleanup & misc. fixes

Patches 1-4 are fixes for issues found by Paolo while working on adding
TCP_NOTSENT_LOWAT support. The latter will need to track more states
under the msk data lock. Since the locking msk locking schema is already
quite complex, do a long awaited clean-up step by moving several
confusing lockless initialization under the relevant locks. Note that it
is unlikely a real race could happen even prior to such patches as the
MPTCP-level state machine implicitly ensures proper serialization of the
write accesses, even lacking explicit lock. But still, simplification is
welcome and this will help for the maintenance. This can be backported
up to v5.6.

Patch 5 is a fix for the userspace PM, not to add new local address
entries if the address is already in the list. This behaviour can be
seen since v5.19.

Patch 6 fixes an issue when Fastopen is used. The issue can happen since
v6.2. A previous fix has already been applied, but not taking care of
all cases according to syzbot.

Patch 7 updates Geliang's email address in the MAINTAINERS file.
====================

Signed-off-by: Matthieu Baerts (NGI0) <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Feb 12, 2024
2 parents bab091d + 68990d0 commit 603604c
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 62 deletions.
9 changes: 5 additions & 4 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,11 @@ Gao Xiang <[email protected]> <[email protected]>
Gao Xiang <[email protected]> <[email protected]>
Gao Xiang <[email protected]> <[email protected]>
Gao Xiang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Geliang Tang <[email protected]> <[email protected]>
Georgi Djakov <[email protected]> <[email protected]>
Gerald Schaefer <[email protected]> <[email protected]>
Gerald Schaefer <[email protected]> <[email protected]>
Expand Down
2 changes: 1 addition & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -15324,7 +15324,7 @@ K: \bmdo_
NETWORKING [MPTCP]
M: Matthieu Baerts <[email protected]>
M: Mat Martineau <[email protected]>
R: Geliang Tang <geliang[email protected]>
R: Geliang Tang <geliang@kernel.org>
L: [email protected]
L: [email protected]
S: Maintained
Expand Down
6 changes: 2 additions & 4 deletions net/mptcp/fastopen.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}

void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;

mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
Expand All @@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
}

pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
mptcp_data_unlock(sk);
}
9 changes: 5 additions & 4 deletions net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
goto check_notify;
goto set_fully_established;
}

/* If the first established packet does not contain MP_CAPABLE + data
Expand All @@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);

mptcp_data_lock((struct sock *)msk);
__mptcp_subflow_fully_established(msk, subflow, mp_opt);
mptcp_data_unlock((struct sock *)msk);

check_notify:
/* if the subflow is not already linked into the conn_list, we can't
Expand Down
13 changes: 12 additions & 1 deletion net/mptcp/pm_userspace.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
struct mptcp_pm_addr_entry new_entry;
struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;

spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
if (mptcp_addresses_equal(&e->addr, skc, false)) {
entry = e;
break;
}
}
spin_unlock_bh(&msk->pm.lock);
if (entry)
return entry->addr.id;

memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
Expand Down
31 changes: 17 additions & 14 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1505,8 +1505,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,

void mptcp_check_and_set_pending(struct sock *sk)
{
if (mptcp_send_head(sk))
mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
if (mptcp_send_head(sk)) {
mptcp_data_lock(sk);
mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
mptcp_data_unlock(sk);
}
}

static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
Expand Down Expand Up @@ -1960,6 +1963,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;

if (!msk->rcvspace_init)
mptcp_rcv_space_init(msk, msk->first);

msk->rcvq_space.copied += copied;

mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
Expand Down Expand Up @@ -3142,7 +3148,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
Expand All @@ -3158,6 +3163,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
msk->rcvspace_init = 0;

WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
Expand All @@ -3180,6 +3186,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;

if (!nsk)
Expand Down Expand Up @@ -3220,7 +3227,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,

/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
subflow = mptcp_subflow_ctx(ssk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssk);

/* new mpc subflow takes ownership of the newly
Expand All @@ -3235,6 +3243,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);

mptcp_rcv_space_init(msk, ssk);

if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
__mptcp_subflow_fully_established(msk, subflow, mp_opt);
bh_unlock_sock(nsk);

/* note: the newly allocated socket refcount is 2 now */
Expand All @@ -3245,6 +3256,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);

msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;

Expand All @@ -3255,8 +3267,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;

WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}

void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
Expand Down Expand Up @@ -3330,8 +3340,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);

for (;;) {
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
msk->push_pending;
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;

if (!flags)
Expand All @@ -3347,7 +3356,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);

Expand Down Expand Up @@ -3475,13 +3483,8 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->snd_una, msk->write_seq);

mptcp_pm_new_connection(msk, ssk, 0);

mptcp_rcv_space_init(msk, ssk);
}

void mptcp_sock_graft(struct sock *sk, struct socket *parent)
Expand Down
16 changes: 9 additions & 7 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
Expand All @@ -305,7 +304,8 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
free_first:1;
free_first:1,
rcvspace_init:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
Expand Down Expand Up @@ -622,8 +622,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
Expand Down Expand Up @@ -952,8 +953,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);

void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);

Expand Down Expand Up @@ -1128,7 +1129,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
return (1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
Expand Down
71 changes: 44 additions & 27 deletions net/mptcp/subflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc

void __mptcp_sync_state(struct sock *sk, int state)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk = msk->first;

subflow = mptcp_subflow_ctx(ssk);
__mptcp_propagate_sndbuf(sk, ssk);
if (!msk->rcvspace_init)
mptcp_rcv_space_init(msk, ssk);

__mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
/* subflow->idsn is always available is TCP_SYN_SENT state,
* even for the FASTOPEN scenarios
*/
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}

static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
{
struct mptcp_sock *msk = mptcp_sk(sk);

mptcp_data_lock(sk);
if (!sock_owned_by_user(sk)) {
__mptcp_sync_state(sk, ssk->sk_state);
} else {
msk->pending_state = ssk->sk_state;
__set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
}
mptcp_data_unlock(sk);
}

static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
Expand All @@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}

static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
{
struct mptcp_sock *msk = mptcp_sk(sk);

mptcp_data_lock(sk);
if (mp_opt) {
/* Options are available only in the non fallback cases
* avoid updating rx path fields otherwise
*/
WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
subflow_set_remote_key(msk, subflow, mp_opt);
}

if (!sock_owned_by_user(sk)) {
__mptcp_sync_state(sk, ssk->sk_state);
} else {
msk->pending_state = ssk->sk_state;
__set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
}
mptcp_data_unlock(sk);
}

static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Expand Down Expand Up @@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
mptcp_propagate_state(parent, sk);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];

Expand Down Expand Up @@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(msk, sk);
mptcp_propagate_state(parent, sk);
mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;

Expand Down Expand Up @@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}

void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
{
struct mptcp_sock *msk = mptcp_sk(subflow->conn);

subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);

if (subflow->is_mptfo)
mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
__mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}

static struct sock *subflow_syn_recv_sock(const struct sock *sk,
Expand Down Expand Up @@ -834,7 +853,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
Expand Down Expand Up @@ -1744,10 +1762,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_propagate_state(parent, sk);
mptcp_propagate_state(parent, sk, subflow, NULL);
}

/* as recvmsg() does not acquire the subflow socket for ssk selection
Expand Down

0 comments on commit 603604c

Please sign in to comment.