Skip to content

Commit

Permalink
Merge branch 'sysctl-races-part-5'
Browse files Browse the repository at this point in the history
Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_net_table (Round 5).

This series fixes data-races around 15 knobs after tcp_dsack in
ipv4_net_table.

tcp_tso_win_divisor was skipped because it already uses READ_ONCE().

So, the final round for ipv4_net_table will start with tcp_pacing_ss_ratio.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jul 22, 2022
2 parents ebbbe23 + 2afdbe7 commit b20a7ca
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 22 deletions.
2 changes: 1 addition & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,

static inline int tcp_win_from_space(const struct sock *sk, int space)
{
int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);

return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
tcp_skb_can_collapse_to(skb);
Expand Down
17 changes: 9 additions & 8 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
*/
static void tcp_init_buffer_space(struct sock *sk)
{
int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;

Expand Down Expand Up @@ -724,7 +724,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/

if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
Expand Down Expand Up @@ -2175,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk)
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
tp->frto = net->ipv4.sysctl_tcp_frto &&
tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
Expand Down Expand Up @@ -3058,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,

static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);

if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
Expand Down Expand Up @@ -3581,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);

if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
if (0 <= elapsed &&
elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
Expand Down Expand Up @@ -3629,7 +3630,7 @@ static void tcp_send_challenge_ack(struct sock *sk)
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1;

challenge_timestamp = now;
Expand Down Expand Up @@ -4426,7 +4427,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);

if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;

if (before(seq, tp->rcv_nxt))
Expand Down Expand Up @@ -4473,7 +4474,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);

if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;

tcp_rcv_spurious_retrans(sk, skb);
Expand Down
10 changes: 5 additions & 5 deletions net/ipv4/tcp_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m;

sk_dst_confirm(sk);
if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;

rcu_read_lock();
Expand Down Expand Up @@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)

if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tcp_snd_cwnd(tp) >> 1) > val)
Expand All @@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
Expand All @@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
Expand Down Expand Up @@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);

val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
Expand Down
10 changes: 5 additions & 5 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = min_t(u32, space, U16_MAX);
Expand Down Expand Up @@ -285,7 +285,7 @@ static u16 tcp_select_window(struct sock *sk)
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale &&
sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
Expand Down Expand Up @@ -1976,7 +1976,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,

bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);

r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log;
r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
bytes += sk->sk_gso_max_size >> r;

Expand All @@ -1995,7 +1995,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)

min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);

tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
Expand Down Expand Up @@ -2507,7 +2507,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;

if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
Expand Down
2 changes: 1 addition & 1 deletion net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (unlikely(th->syn))
new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
if (!tp->rx_opt.rcv_wscale &&
sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
Expand Down
2 changes: 1 addition & 1 deletion net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1908,7 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;

if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
Expand Down

0 comments on commit b20a7ca

Please sign in to comment.