Skip to content

Commit

Permalink
tipc: introduce variable window congestion control
Browse files Browse the repository at this point in the history
We introduce a simple variable window congestion control for links.
The algorithm is inspired by the Reno algorithm, covering both 'slow
start', 'congestion avoidance', and 'fast recovery' modes.

- We introduce hard lower and upper window limits per link, still
  different and configurable per bearer type.

- We introduce a 'slow start theshold' variable, initially set to
  the maximum window size.

- We let a link start at the minimum congestion window, i.e. in slow
  start mode, and then let is grow rapidly (+1 per rceived ACK) until
  it reaches the slow start threshold and enters congestion avoidance
  mode.

- In congestion avoidance mode we increment the congestion window for
  each window-size number of acked packets, up to a possible maximum
  equal to the configured maximum window.

- For each non-duplicate NACK received, we drop back to fast recovery
  mode, by setting the both the slow start threshold to and the
  congestion window to (current_congestion_window / 2).

- If the timeout handler finds that the transmit queue has not moved
  since the previous timeout, it drops the link back to slow start
  and forces a probe containing the last sent sequence number to the
  sent to the peer, so that this can discover the stale situation.

This change does in reality have effect only on unicast ethernet
transport, as we have seen that there is no room whatsoever for
increasing the window max size for the UDP bearer.
For now, we also choose to keep the limits for the broadcast link
unchanged and equal.

This algorithm seems to give a 50-100% throughput improvement for
messages larger than MTU.

Suggested-by: Xin Long <[email protected]>
Acked-by: Ying Xue <[email protected]>
Signed-off-by: Jon Maloy <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Jon Maloy authored and davem330 committed Dec 11, 2019
1 parent d3b0999 commit 16ad3f4
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 79 deletions.
11 changes: 6 additions & 5 deletions net/tipc/bcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,18 +562,18 @@ int tipc_bclink_reset_stats(struct net *net)
return 0;
}

static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win)
{
struct tipc_link *l = tipc_bc_sndlink(net);

if (!l)
return -ENOPROTOOPT;
if (limit < BCLINK_WIN_MIN)
limit = BCLINK_WIN_MIN;
if (limit > TIPC_MAX_LINK_WIN)
if (max_win < BCLINK_WIN_MIN)
max_win = BCLINK_WIN_MIN;
if (max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
tipc_bcast_lock(net);
tipc_link_set_queue_limits(l, limit);
tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win);
tipc_bcast_unlock(net);
return 0;
}
Expand Down Expand Up @@ -683,6 +683,7 @@ int tipc_bcast_init(struct net *net)
if (!tipc_link_bc_create(net, 0, 0,
FB_MTU,
BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
0,
&bb->inputq,
NULL,
Expand Down
11 changes: 6 additions & 5 deletions net/tipc/bearer.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ static int tipc_enable_bearer(struct net *net, const char *name,

b->identity = bearer_id;
b->tolerance = m->tolerance;
b->window = m->window;
b->min_win = m->min_win;
b->max_win = m->max_win;
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = prio;
Expand Down Expand Up @@ -796,7 +797,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win))
goto prop_msg_full;
if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
Expand Down Expand Up @@ -1088,7 +1089,7 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
return -EINVAL;
Expand Down Expand Up @@ -1142,7 +1143,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win))
goto prop_msg_full;
if (media->type_id == TIPC_MEDIA_TYPE_UDP)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
Expand Down Expand Up @@ -1275,7 +1276,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_PRIO])
m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
if (m->type_id != TIPC_MEDIA_TYPE_UDP)
return -EINVAL;
Expand Down
6 changes: 4 additions & 2 deletions net/tipc/bearer.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ struct tipc_media {
char *raw);
u32 priority;
u32 tolerance;
u32 window;
u32 min_win;
u32 max_win;
u32 mtu;
u32 type_id;
u32 hwaddr_len;
Expand Down Expand Up @@ -158,7 +159,8 @@ struct tipc_bearer {
struct packet_type pt;
struct rcu_head rcu;
u32 priority;
u32 window;
u32 min_win;
u32 max_win;
u32 tolerance;
u32 domain;
u32 identity;
Expand Down
3 changes: 2 additions & 1 deletion net/tipc/eth_media.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ struct tipc_media eth_media_info = {
.raw2addr = tipc_eth_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
.min_win = TIPC_DEF_LINK_WIN,
.max_win = TIPC_MAX_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_ETH,
.hwaddr_len = ETH_ALEN,
.name = "eth"
Expand Down
5 changes: 4 additions & 1 deletion net/tipc/ib_media.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include "core.h"
#include "bearer.h"

#define TIPC_MAX_IB_LINK_WIN 500

/* convert InfiniBand address (media address format) media address to string */
static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
int str_size)
Expand Down Expand Up @@ -94,7 +96,8 @@ struct tipc_media ib_media_info = {
.raw2addr = tipc_ib_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
.min_win = TIPC_DEF_LINK_WIN,
.max_win = TIPC_MAX_IB_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_IB,
.hwaddr_len = INFINIBAND_ALEN,
.name = "ib"
Expand Down
Loading

0 comments on commit 16ad3f4

Please sign in to comment.