Skip to content

Commit

Permalink
The TCP stack has been changed to use the estimated RTT instead of ti…
Browse files Browse the repository at this point in the history
…mestamps for receive buffer auto resizing.

Corresponding upstream changeset from https://svnweb.freebsd.org/base?view=revision&revision=317368.
  • Loading branch information
jfb8856606 committed Nov 22, 2019
1 parent 56d87bf commit 04b1440
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 124 deletions.
8 changes: 8 additions & 0 deletions freebsd/netinet/in_kdtrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,14 @@ SDT_PROBE_DEFINE6_XLATE(tcp, , , state__change,
"void *", "void *",
"int", "tcplsinfo_t *");

SDT_PROBE_DEFINE6_XLATE(tcp, , , receive__autoresize,
"void *", "void *",
"struct tcpcb *", "csinfo_t *",
"struct mbuf *", "ipinfo_t *",
"struct tcpcb *", "tcpsinfo_t *" ,
"struct tcphdr *", "tcpinfoh_t *",
"int", "int");

SDT_PROBE_DEFINE5_XLATE(udp, , , receive,
"void *", "pktinfo_t *",
"struct inpcb *", "csinfo_t *",
Expand Down
1 change: 1 addition & 0 deletions freebsd/netinet/in_kdtrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ SDT_PROBE_DECLARE(tcp, , , debug__input);
SDT_PROBE_DECLARE(tcp, , , debug__output);
SDT_PROBE_DECLARE(tcp, , , debug__user);
SDT_PROBE_DECLARE(tcp, , , debug__drop);
SDT_PROBE_DECLARE(tcp, , , receive__autoresize);

SDT_PROBE_DECLARE(udp, , , receive);
SDT_PROBE_DECLARE(udp, , , send);
Expand Down
123 changes: 63 additions & 60 deletions freebsd/netinet/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -1494,6 +1494,68 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}

/*
* Automatic sizing of receive socket buffer. Often the send
* buffer size is not optimally adjusted to the actual network
* conditions at hand (delay bandwidth product). Setting the
* buffer size too small limits throughput on links with high
* bandwidth and high delay (eg. trans-continental/oceanic links).
*
* On the receive side the socket buffer memory is only rarely
* used to any significant extent. This allows us to be much
* more aggressive in scaling the receive socket buffer. For
* the case that the buffer space is actually used to a large
* extent and we run out of kernel memory we can simply drop
* the new segments; TCP on the sender will just retransmit it
* later. Setting the buffer size too big may only consume too
* much kernel memory if the application doesn't read() from
* the socket or packet loss or reordering makes use of the
* reassembly queue.
*
* The criteria to step up the receive buffer one notch are:
* 1. Application has not set receive buffer size with
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
* 2. the number of bytes received during the time it takes
* one timestamp to be reflected back to us (the RTT);
* 3. received bytes per RTT is within seven eighth of the
* current socket buffer size;
* 4. receive buffer size has not hit maximal automatic size;
*
* This algorithm does one step per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
* Shrinking the buffer during idle times is not necessary as
* it doesn't consume any memory when idle.
*
* TODO: Only step up if the application is actually serving
* the buffer to better manage the socket buffer resources.
*/
int
tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int tlen)
{
int newsize = 0;

if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
(tp->t_srtt >> TCP_RTT_SHIFT)) {
if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
newsize = min(so->so_rcv.sb_hiwat +
V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
}
TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);

/* Start over with next RTT. */
tp->rfbuf_ts = 0;
tp->rfbuf_cnt = 0;
} else {
tp->rfbuf_cnt += tlen; /* add up */
}

return (newsize);
}

void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
Expand Down Expand Up @@ -1847,62 +1909,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
#endif
TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));

/*
* Automatic sizing of receive socket buffer. Often the send
* buffer size is not optimally adjusted to the actual network
* conditions at hand (delay bandwidth product). Setting the
* buffer size too small limits throughput on links with high
* bandwidth and high delay (eg. trans-continental/oceanic links).
*
* On the receive side the socket buffer memory is only rarely
* used to any significant extent. This allows us to be much
* more aggressive in scaling the receive socket buffer. For
* the case that the buffer space is actually used to a large
* extent and we run out of kernel memory we can simply drop
* the new segments; TCP on the sender will just retransmit it
* later. Setting the buffer size too big may only consume too
* much kernel memory if the application doesn't read() from
* the socket or packet loss or reordering makes use of the
* reassembly queue.
*
* The criteria to step up the receive buffer one notch are:
* 1. Application has not set receive buffer size with
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
* 2. the number of bytes received during the time it takes
* one timestamp to be reflected back to us (the RTT);
* 3. received bytes per RTT is within seven eighth of the
* current socket buffer size;
* 4. receive buffer size has not hit maximal automatic size;
*
* This algorithm does one step per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
* Shrinking the buffer during idle times is not necessary as
* it doesn't consume any memory when idle.
*
* TODO: Only step up if the application is actually serving
* the buffer to better manage the socket buffer resources.
*/
if (V_tcp_do_autorcvbuf &&
(to.to_flags & TOF_TS) &&
to.to_tsecr &&
(so->so_rcv.sb_flags & SB_AUTOSIZE)) {
if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
to.to_tsecr - tp->rfbuf_ts < hz) {
if (tp->rfbuf_cnt >
(so->so_rcv.sb_hiwat / 8 * 7) &&
so->so_rcv.sb_hiwat <
V_tcp_autorcvbuf_max) {
newsize =
min(so->so_rcv.sb_hiwat +
V_tcp_autorcvbuf_inc,
V_tcp_autorcvbuf_max);
}
/* Start over with next RTT. */
tp->rfbuf_ts = 0;
tp->rfbuf_cnt = 0;
} else
tp->rfbuf_cnt += tlen; /* add up */
}
newsize = tcp_autorcvbuf(m, th, so, tp, tlen);

/* Add data to socket buffer. */
SOCKBUF_LOCK(&so->so_rcv);
Expand Down Expand Up @@ -1943,10 +1950,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));

/* Reset receive buffer auto scaling when not in bulk receive mode. */
tp->rfbuf_ts = 0;
tp->rfbuf_cnt = 0;

switch (tp->t_state) {

/*
Expand Down
10 changes: 6 additions & 4 deletions freebsd/netinet/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -798,11 +798,13 @@ tcp_output(struct tcpcb *tp)
to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
/* Set receive buffer autosizing timestamp. */
if (tp->rfbuf_ts == 0 &&
(so->so_rcv.sb_flags & SB_AUTOSIZE))
tp->rfbuf_ts = tcp_ts_getticks();
}

/* Set receive buffer autosizing timestamp. */
if (tp->rfbuf_ts == 0 &&
(so->so_rcv.sb_flags & SB_AUTOSIZE))
tp->rfbuf_ts = tcp_ts_getticks();

/* Selective ACK's. */
if (tp->t_flags & TF_SACK_PERMIT) {
if (flags & TH_SYN)
Expand Down
62 changes: 2 additions & 60 deletions freebsd/netinet/tcp_stacks/fastpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,62 +396,8 @@ tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
/*
* Automatic sizing of receive socket buffer. Often the send
* buffer size is not optimally adjusted to the actual network
* conditions at hand (delay bandwidth product). Setting the
* buffer size too small limits throughput on links with high
* bandwidth and high delay (eg. trans-continental/oceanic links).
*
* On the receive side the socket buffer memory is only rarely
* used to any significant extent. This allows us to be much
* more aggressive in scaling the receive socket buffer. For
* the case that the buffer space is actually used to a large
* extent and we run out of kernel memory we can simply drop
* the new segments; TCP on the sender will just retransmit it
* later. Setting the buffer size too big may only consume too
* much kernel memory if the application doesn't read() from
* the socket or packet loss or reordering makes use of the
* reassembly queue.
*
* The criteria to step up the receive buffer one notch are:
* 1. Application has not set receive buffer size with
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
* 2. the number of bytes received during the time it takes
* one timestamp to be reflected back to us (the RTT);
* 3. received bytes per RTT is within seven eighth of the
* current socket buffer size;
* 4. receive buffer size has not hit maximal automatic size;
*
* This algorithm does one step per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
* Shrinking the buffer during idle times is not necessary as
* it doesn't consume any memory when idle.
*
* TODO: Only step up if the application is actually serving
* the buffer to better manage the socket buffer resources.
*/
if (V_tcp_do_autorcvbuf &&
(to->to_flags & TOF_TS) &&
to->to_tsecr &&
(so->so_rcv.sb_flags & SB_AUTOSIZE)) {
if (TSTMP_GT(to->to_tsecr, tp->rfbuf_ts) &&
to->to_tsecr - tp->rfbuf_ts < hz) {
if (tp->rfbuf_cnt >
(so->so_rcv.sb_hiwat / 8 * 7) &&
so->so_rcv.sb_hiwat <
V_tcp_autorcvbuf_max) {
newsize =
min(so->so_rcv.sb_hiwat +
V_tcp_autorcvbuf_inc,
V_tcp_autorcvbuf_max);
}
/* Start over with next RTT. */
tp->rfbuf_ts = 0;
tp->rfbuf_cnt = 0;
} else
tp->rfbuf_cnt += tlen; /* add up */
}

newsize = tcp_autorcvbuf(m, th, so, tp, tlen);

/* Add data to socket buffer. */
SOCKBUF_LOCK(&so->so_rcv);
Expand Down Expand Up @@ -526,10 +472,6 @@ tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));

/* Reset receive buffer auto scaling when not in bulk receive mode. */
tp->rfbuf_ts = 0;
tp->rfbuf_cnt = 0;

switch (tp->t_state) {

/*
Expand Down
2 changes: 2 additions & 0 deletions freebsd/netinet/tcp_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ void hhook_run_tcp_est_in(struct tcpcb *tp,
struct tcphdr *th, struct tcpopt *to);

int tcp_input(struct mbuf **, int *, int);
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
struct tcpcb *, int);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t,
int);
Expand Down

0 comments on commit 04b1440

Please sign in to comment.