Skip to content

Commit

Permalink
tcp: more efficient RACK loss detection
Browse files Browse the repository at this point in the history
Use the new time-ordered list to speed up RACK. The detection
logic is identical. But since the list is chronologically ordered
by skb_mstamp and contains only skbs not yet acked or sacked,
RACK can abort the loop upon hitting skbs that were sent more
recently. On YouTube servers this patch reduces the iterations on
write queue by 40x. The improvement is even bigger with large
BDP networks.

Signed-off-by: Yuchung Cheng <[email protected]>
Signed-off-by: Neal Cardwell <[email protected]>
Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
yuchungcheng authored and davem330 committed Oct 6, 2017
1 parent e208007 commit 043b87d
Showing 1 changed file with 5 additions and 15 deletions.
20 changes: 5 additions & 15 deletions net/ipv4/tcp_recovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
struct sk_buff *skb, *n;
u32 reo_wnd;

*reo_timeout = 0;
Expand All @@ -58,17 +58,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);

tcp_for_write_queue(skb, sk) {
list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
tcp_tsorted_anchor) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);

if (skb == tcp_send_head(sk))
break;

/* Skip ones already (s)acked */
if (!after(scb->end_seq, tp->snd_una) ||
scb->sacked & TCPCB_SACKED_ACKED)
continue;

if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
tp->rack.end_seq, scb->end_seq)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
Expand All @@ -81,6 +74,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)

if (remaining < 0) {
tcp_rack_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
continue;
}

Expand All @@ -91,11 +85,7 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)

/* Record maximum wait time (+1 to avoid 0) */
*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);

} else if (!(scb->sacked & TCPCB_RETRANS)) {
/* Original data are sent sequentially so stop early
* b/c the rest are all sent after rack_sent
*/
} else {
break;
}
}
Expand Down

0 comments on commit 043b87d

Please sign in to comment.