Skip to content

Commit

Permalink
userspace: Support GRE TSO.
Browse files Browse the repository at this point in the history
This patch extends the userspace datapaths support of tunnel tso from
only supporting VxLAN and Geneve to also supporting GRE tunnels. There
is also a software fallback for cases where the egress netdev does not
support this feature.

Reviewed-by: David Marchand <[email protected]>
Signed-off-by: Mike Pattrick <[email protected]>
Signed-off-by: Ilya Maximets <[email protected]>
  • Loading branch information
mkp-rh authored and igsilya committed Jan 16, 2025
1 parent d68b73e commit 2276c3a
Show file tree
Hide file tree
Showing 12 changed files with 298 additions and 48 deletions.
6 changes: 3 additions & 3 deletions Documentation/topics/userspace-tso.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ Limitations
~~~~~~~~~~~

The current OvS userspace `TSO` implementation supports flat, VLAN networks,
and some tunneled connections. Currently only VxLAN and Geneve tunnels are
supported.
and some tunneled connections. Currently only VxLAN, Geneve and GRE tunnels
are supported.

The NIC driver must support and advertise checksum offload for TCP and UDP.
However, SCTP is not mandatory because very few drivers advertised support
Expand All @@ -121,7 +121,7 @@ enabled, otherwise TSO can still be enabled but SCTP packets sent to the NIC
will be dropped.

There is a limited software implementation of TSO when tunnels are used which
only supports VxLAN and Geneve. When these tunnels are used with TSO,
only supports VxLAN, Geneve, and GRE. When these tunnels are used with TSO,
not all ports attached to the datapath need to support hardware TSO.
Guests using vhost-user in client mode will receive TSO packet regardless of
TSO being enabled or disabled within the guest.
Expand Down
2 changes: 1 addition & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Post-v3.4.0
treated as a global value, aligning the behavior with that of
the kernel datapath.
* Extended the support for TSO software fallback to include support for
VXLAN and Geneve tunneled packets.
VXLAN, Geneve, and GRE tunneled packets.
- Linux TC offload:
* Add support for matching tunnel flags if the kernel supports it.
* Add support for the "Don't Fragment" (DF) flag in the encap action,
Expand Down
30 changes: 21 additions & 9 deletions lib/dp-packet-gso.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
const char *data_tail;
const char *data_pos;

if (dp_packet_hwol_is_tunnel_vxlan(p) ||
dp_packet_hwol_is_tunnel_geneve(p)) {
if (dp_packet_hwol_is_tunnel(p)) {
data_pos = dp_packet_get_inner_tcp_payload(p);
} else {
data_pos = dp_packet_get_tcp_payload(p);
Expand Down Expand Up @@ -105,7 +104,9 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
bool outer_ipv4;
int hdr_len;
int seg_len;
bool tnl;
bool udp_tnl = dp_packet_hwol_is_tunnel_vxlan(p) ||
dp_packet_hwol_is_tunnel_geneve(p);
bool gre_tnl = dp_packet_hwol_is_tunnel_gre(p);

tso_segsz = dp_packet_get_tso_segsz(p);
if (!tso_segsz) {
Expand All @@ -114,11 +115,9 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
return false;
}

if (dp_packet_hwol_is_tunnel_vxlan(p) ||
dp_packet_hwol_is_tunnel_geneve(p)) {
if (udp_tnl || gre_tnl) {
outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
tcp_hdr = dp_packet_inner_l4(p);
tnl = true;

if (outer_ipv4) {
outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
Expand All @@ -130,7 +129,6 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
} else {
outer_ipv4 = dp_packet_hwol_is_ipv4(p);
tcp_hdr = dp_packet_l4(p);
tnl = false;

if (outer_ipv4) {
struct ip_header *ip_hdr = dp_packet_l3(p);
Expand All @@ -156,13 +154,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
data_pos += seg_len;

if (tnl) {
if (udp_tnl) {
/* Update tunnel UDP header length. */
struct udp_header *tnl_hdr;

tnl_hdr = dp_packet_l4(seg);
tnl_hdr->udp_len = htons(dp_packet_l4_size(seg));
}

if (udp_tnl || gre_tnl) {
/* Update tunnel inner L3 header. */
if (dp_packet_hwol_is_ipv4(seg)) {
struct ip_header *ip_hdr = dp_packet_inner_l3(seg);
Expand Down Expand Up @@ -194,7 +194,7 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
}

/* Update L4 header. */
if (tnl) {
if (udp_tnl || gre_tnl) {
tcp_hdr = dp_packet_inner_l4(seg);
} else {
tcp_hdr = dp_packet_l4(seg);
Expand All @@ -208,6 +208,18 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset);
}

if (gre_tnl) {
struct gre_base_hdr *ghdr;

ghdr = dp_packet_l4(seg);

if (ghdr->flags & htons(GRE_CSUM)) {
ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1);
*csum_opt = 0;
*csum_opt = csum(ghdr, dp_packet_l4_size(seg));
}
}

if (dp_packet_batch_is_full(curr_batch)) {
curr_batch++;
}
Expand Down
2 changes: 2 additions & 0 deletions lib/dp-packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,8 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
NETDEV_TX_OFFLOAD_SCTP_CKSUM |
NETDEV_TX_OFFLOAD_IPV4_CKSUM);
}
} else if (dp_packet_hwol_is_tunnel_gre(p)) {
tnl_inner = true;
}

if (dp_packet_hwol_tx_ip_csum(p)) {
Expand Down
31 changes: 31 additions & 0 deletions lib/dp-packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ enum dp_packet_offload_mask {
/* Offload tunnel packet, outer header is IPv6. */
DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6,
RTE_MBUF_F_TX_OUTER_IPV6, 0x40000),
/* Offload packet is GRE tunnel. */
DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GRE,
RTE_MBUF_F_TX_TUNNEL_GRE, 0x80000),

/* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
};
Expand All @@ -123,6 +126,7 @@ enum dp_packet_offload_mask {
DP_PACKET_OL_TX_IP_CKSUM | \
DP_PACKET_OL_TX_TUNNEL_GENEVE | \
DP_PACKET_OL_TX_TUNNEL_VXLAN | \
DP_PACKET_OL_TX_TUNNEL_GRE | \
DP_PACKET_OL_TX_OUTER_IPV4 | \
DP_PACKET_OL_TX_OUTER_IP_CKSUM | \
DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \
Expand Down Expand Up @@ -1171,6 +1175,22 @@ dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b)
return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN);
}

/* Returns 'true' if packet 'b' is marked for GRE tunnel offloading. */
static inline bool
dp_packet_hwol_is_tunnel_gre(struct dp_packet *b)
{
return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GRE);
}

/* Returns true if packet 'b' has any offloadable tunnel type. */
static inline bool
dp_packet_hwol_is_tunnel(struct dp_packet *b)
{
return !!(*dp_packet_ol_flags_ptr(b) & (DP_PACKET_OL_TX_TUNNEL_VXLAN |
DP_PACKET_OL_TX_TUNNEL_GRE |
DP_PACKET_OL_TX_TUNNEL_GENEVE));
}

/* Returns 'true' if packet 'b' is marked for outer IPv4 checksum offload. */
static inline bool
dp_packet_hwol_is_outer_ipv4_cksum(const struct dp_packet *b)
Expand Down Expand Up @@ -1289,11 +1309,19 @@ dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN;
}

/* Mark packet 'b' for GRE tunnel offloading. */
static inline void
dp_packet_hwol_set_tunnel_gre(struct dp_packet *b)
{
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GRE;
}

/* Clears tunnel offloading marks. */
static inline void
dp_packet_hwol_reset_tunnel(struct dp_packet *b)
{
*dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN |
DP_PACKET_OL_TX_TUNNEL_GRE |
DP_PACKET_OL_TX_TUNNEL_GENEVE);
}

Expand Down Expand Up @@ -1352,6 +1380,9 @@ dp_packet_hwol_reset_tcp_seg(struct dp_packet *p)
ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM;
}
ol_flags |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM;
} else if (ol_flags & DP_PACKET_OL_TX_TUNNEL_GRE &&
ol_flags & DP_PACKET_OL_TX_OUTER_IPV4) {
ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM;
}

*dp_packet_ol_flags_ptr(p) = ol_flags;
Expand Down
4 changes: 1 addition & 3 deletions lib/dpif-netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -8928,9 +8928,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_packet *packet;

DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) {
if (dp_packet_hwol_is_tunnel_geneve(packet) ||
dp_packet_hwol_is_tunnel_vxlan(packet)) {

if (dp_packet_hwol_is_tunnel(packet)) {
if (dp_packet_hwol_is_tso(packet)) {
/* Can't perform GSO in the middle of a pipeline. */
COVERAGE_INC(datapath_drop_tunnel_tso_recirc);
Expand Down
15 changes: 15 additions & 0 deletions lib/netdev-dpdk.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ enum dpdk_hw_ol_features {
NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9,
NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10,
NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11,
NETDEV_TX_GRE_TNL_TSO_OFFLOAD = 1 << 12,
};

enum dpdk_rx_steer_flags {
Expand Down Expand Up @@ -1100,6 +1101,8 @@ netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
NETDEV_TX_OFFLOAD_TCP_TSO);
netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD,
NETDEV_TX_VXLAN_TNL_TSO);
netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GRE_TNL_TSO_OFFLOAD,
NETDEV_TX_GRE_TNL_TSO);
netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD,
NETDEV_TX_GENEVE_TNL_TSO);
netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD,
Expand Down Expand Up @@ -1167,6 +1170,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev,
conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
}

if (dev->hw_ol_features & NETDEV_TX_GRE_TNL_TSO_OFFLOAD) {
conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO;
}

if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) {
conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM;
}
Expand Down Expand Up @@ -1443,6 +1450,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
netdev_get_name(&dev->up));
}

if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO) {
dev->hw_ol_features |= NETDEV_TX_GRE_TNL_TSO_OFFLOAD;
} else {
VLOG_WARN("%s: Tx GRE tunnel TSO offload is not supported.",
netdev_get_name(&dev->up));
}
}

n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
Expand Down Expand Up @@ -2650,6 +2664,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
if (OVS_UNLIKELY(tunnel_type &&
tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE &&
tunnel_type != RTE_MBUF_F_TX_TUNNEL_GRE &&
tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
netdev_get_name(&dev->up), tunnel_type);
Expand Down
40 changes: 30 additions & 10 deletions lib/netdev-native-tnl.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,7 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label);
packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;

if (dp_packet_hwol_is_tunnel_geneve(packet) ||
dp_packet_hwol_is_tunnel_vxlan(packet)) {
if (dp_packet_hwol_is_tunnel(packet)) {
dp_packet_hwol_set_tx_outer_ipv6(packet);
} else {
dp_packet_hwol_set_tx_ipv6(packet);
Expand All @@ -207,8 +206,7 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
ip = netdev_tnl_ip_hdr(eth);
ip->ip_tot_len = htons(*ip_tot_size);
/* Postpone checksum to when the packet is pushed to the port. */
if (dp_packet_hwol_is_tunnel_geneve(packet) ||
dp_packet_hwol_is_tunnel_vxlan(packet)) {
if (dp_packet_hwol_is_tunnel(packet)) {
dp_packet_hwol_set_tx_outer_ipv4(packet);
dp_packet_hwol_set_tx_outer_ipv4_csum(packet);
} else {
Expand Down Expand Up @@ -271,7 +269,9 @@ dp_packet_tnl_ol_process(struct dp_packet *packet,
ip = dp_packet_l3(packet);

if (data->tnl_type == OVS_VPORT_TYPE_GENEVE ||
data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
data->tnl_type == OVS_VPORT_TYPE_VXLAN ||
data->tnl_type == OVS_VPORT_TYPE_GRE ||
data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {

if (IP_VER(ip->ip_ihl_ver) == 4) {
dp_packet_hwol_set_tx_ipv4(packet);
Expand All @@ -286,6 +286,9 @@ dp_packet_tnl_ol_process(struct dp_packet *packet,
dp_packet_hwol_set_tunnel_geneve(packet);
} else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
dp_packet_hwol_set_tunnel_vxlan(packet);
} else if (data->tnl_type == OVS_VPORT_TYPE_GRE ||
data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
dp_packet_hwol_set_tunnel_gre(packet);
}
}

Expand Down Expand Up @@ -535,9 +538,13 @@ netdev_gre_push_header(const struct netdev *netdev,
const struct ovs_action_push_tnl *data)
{
struct netdev_vport *dev = netdev_vport_cast(netdev);
uint16_t l3_ofs = packet->l3_ofs;
uint16_t l4_ofs = packet->l4_ofs;
struct gre_base_hdr *greh;
int ip_tot_size;

dp_packet_tnl_ol_process(packet, data);

greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
&ip_tot_size, 0);

Expand All @@ -547,11 +554,24 @@ netdev_gre_push_header(const struct netdev *netdev,
}

if (greh->flags & htons(GRE_SEQ)) {
/* Last 4 byte is GRE seqno */
int seq_ofs = gre_header_len(greh->flags) - 4;
ovs_16aligned_be32 *seq_opt =
ALIGNED_CAST(ovs_16aligned_be32 *, (char *)greh + seq_ofs);
put_16aligned_be32(seq_opt, htonl(atomic_count_inc(&dev->gre_seqno)));
if (!dp_packet_hwol_is_tso(packet)) {
/* Last 4 bytes are GRE seqno. */
int seq_ofs = gre_header_len(greh->flags) - 4;
ovs_16aligned_be32 *seq_opt =
ALIGNED_CAST(ovs_16aligned_be32 *, (char *) greh + seq_ofs);

put_16aligned_be32(seq_opt,
htonl(atomic_count_inc(&dev->gre_seqno)));
} else {
VLOG_WARN_RL(&err_rl, "Cannot use GRE Sequence numbers with TSO.");
}
}

if (l3_ofs != UINT16_MAX) {
packet->inner_l3_ofs = l3_ofs + data->header_len;
}
if (l4_ofs != UINT16_MAX) {
packet->inner_l4_ofs = l4_ofs + data->header_len;
}
}

Expand Down
1 change: 1 addition & 0 deletions lib/netdev-provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ enum netdev_ol_flags {
NETDEV_TX_GENEVE_TNL_TSO = 1 << 6,
NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7,
NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8,
NETDEV_TX_GRE_TNL_TSO = 1 << 9,
};

/* A network device (e.g. an Ethernet device).
Expand Down
Loading

0 comments on commit 2276c3a

Please sign in to comment.