Skip to content

Commit

Permalink
conntrack: Optimize recirculations.
Browse files Browse the repository at this point in the history
Cache the 'conn' context and use it when it is valid.  The cached 'conn'
context will get reset if it is not expected to be valid; the cost to do
this is negligible.  Besides being most optimal, this also handles corner
cases, such as decapsulation leading to the same tuple, as in tunnel VPN
cases.  A negative test is added to check the resetting of the cached
'conn'.

Signed-off-by: Darrell Ball <[email protected]>
Signed-off-by: Ben Pfaff <[email protected]>
  • Loading branch information
darball1 authored and blp committed Sep 25, 2019
1 parent ba5ca28 commit 594570e
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 5 deletions.
60 changes: 55 additions & 5 deletions lib/conntrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,46 @@ conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
return false;
}

static void
set_cached_conn(const struct nat_action_info_t *nat_action_info,
const struct conn_lookup_ctx *ctx, struct conn *conn,
struct dp_packet *pkt)
{
if (OVS_LIKELY(!nat_action_info)) {
pkt->md.conn = conn;
pkt->md.reply = ctx->reply;
pkt->md.icmp_related = ctx->icmp_related;
} else {
pkt->md.conn = NULL;
}
}

static void
process_one_fast(uint16_t zone, const uint32_t *setmark,
const struct ovs_key_ct_labels *setlabel,
const struct nat_action_info_t *nat_action_info,
struct conn *conn, struct dp_packet *pkt)
{
if (nat_action_info) {
handle_nat(pkt, conn, zone, pkt->md.reply, pkt->md.icmp_related);
pkt->md.conn = NULL;
}

pkt->md.ct_zone = zone;
ovs_mutex_lock(&conn->lock);
pkt->md.ct_mark = conn->mark;
pkt->md.ct_label = conn->label;
ovs_mutex_unlock(&conn->lock);

if (setmark) {
set_mark(pkt, conn, setmark[0], setmark[1]);
}

if (setlabel) {
set_label(pkt, conn, &setlabel[0], &setlabel[1]);
}
}

static void
process_one(struct conntrack *ct, struct dp_packet *pkt,
struct conn_lookup_ctx *ctx, uint16_t zone,
Expand Down Expand Up @@ -1188,6 +1228,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
}

handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info);

set_cached_conn(nat_action_info, ctx, conn, pkt);
}

/* Sends the packets in '*pkt_batch' through the connection tracker 'ct'. All
Expand Down Expand Up @@ -1215,14 +1257,21 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
struct conn_lookup_ctx ctx;

DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
if (packet->md.ct_state == CS_INVALID
|| !conn_key_extract(ct, packet, dl_type, &ctx, zone)) {
struct conn *conn = packet->md.conn;
if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) {
write_ct_md(packet, zone, NULL, NULL, NULL);
} else if (conn && conn->key.zone == zone && !force
&& !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) {
process_one_fast(zone, setmark, setlabel, nat_action_info,
conn, packet);
} else if (OVS_UNLIKELY(!conn_key_extract(ct, packet, dl_type, &ctx,
zone))) {
packet->md.ct_state = CS_INVALID;
write_ct_md(packet, zone, NULL, NULL, NULL);
continue;
} else {
process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
setlabel, nat_action_info, tp_src, tp_dst, helper);
}
process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
setlabel, nat_action_info, tp_src, tp_dst, helper);
}

ipf_postprocess_conntrack(ct->ipf, pkt_batch, now, dl_type);
Expand All @@ -1236,6 +1285,7 @@ conntrack_clear(struct dp_packet *packet)
/* According to pkt_metadata_init(), ct_state == 0 is enough to make all of
* the conntrack fields invalid. */
packet->md.ct_state = 0;
pkt_metadata_init_conn(&packet->md);
}

static void
Expand Down
1 change: 1 addition & 0 deletions lib/netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,7 @@ netdev_pop_header(struct netdev *netdev, struct dp_packet_batch *batch)
* interpretation in the further packet processing when
* recirculated.*/
dp_packet_reset_offload(packet);
pkt_metadata_init_conn(&packet->md);
dp_packet_batch_refill(batch, packet, i);
}
}
Expand Down
9 changes: 9 additions & 0 deletions lib/packets.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,8 @@ push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse)
header = dp_packet_resize_l2_5(packet, MPLS_HLEN);
memmove(header, header + MPLS_HLEN, len);
memcpy(header + len, &lse, sizeof lse);

pkt_metadata_init_conn(&packet->md);
}

/* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry.
Expand Down Expand Up @@ -983,6 +985,8 @@ packet_set_ipv4_addr(struct dp_packet *packet,
ovs_be32 old_addr = get_16aligned_be32(addr);
size_t l4_size = dp_packet_l4_size(packet);

pkt_metadata_init_conn(&packet->md);

if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
struct tcp_header *th = dp_packet_l4(packet);

Expand Down Expand Up @@ -1122,6 +1126,7 @@ packet_set_ipv6_addr(struct dp_packet *packet, uint8_t proto,
packet_update_csum128(packet, proto, addr, new_addr);
}
memcpy(addr, new_addr, sizeof(ovs_be32[4]));
pkt_metadata_init_conn(&packet->md);
}

static void
Expand Down Expand Up @@ -1223,6 +1228,7 @@ packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)

packet_set_port(&th->tcp_src, src, &th->tcp_csum);
packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
pkt_metadata_init_conn(&packet->md);
}

/* Sets the UDP source and destination port ('src' and 'dst' respectively) of
Expand All @@ -1244,6 +1250,7 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
uh->udp_src = src;
uh->udp_dst = dst;
}
pkt_metadata_init_conn(&packet->md);
}

/* Sets the SCTP source and destination port ('src' and 'dst' respectively) of
Expand All @@ -1265,6 +1272,7 @@ packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)

new_csum = crc32c((void *)sh, tp_len);
put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
pkt_metadata_init_conn(&packet->md);
}

/* Sets the ICMP type and code of the ICMP header contained in 'packet'.
Expand All @@ -1283,6 +1291,7 @@ packet_set_icmp(struct dp_packet *packet, uint8_t type, uint8_t code)

ih->icmp_csum = recalc_csum16(ih->icmp_csum, orig_tc, new_tc);
}
pkt_metadata_init_conn(&packet->md);
}

/* Sets the IGMP type to IGMP_HOST_MEMBERSHIP_QUERY and populates the
Expand Down
11 changes: 11 additions & 0 deletions lib/packets.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "timeval.h"

struct dp_packet;
struct conn;
struct ds;

/* Purely internal to OVS userspace. These flags should never be exposed to
Expand Down Expand Up @@ -108,6 +109,9 @@ PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
uint32_t ct_mark; /* Connection mark. */
ovs_u128 ct_label; /* Connection label. */
union flow_in_port in_port; /* Input port. */
struct conn *conn; /* Cached conntrack connection. */
bool reply; /* True if reply direction. */
bool icmp_related; /* True if ICMP related. */
);

PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
Expand Down Expand Up @@ -139,6 +143,12 @@ pkt_metadata_init_tnl(struct pkt_metadata *md)
memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts));
}

static inline void
pkt_metadata_init_conn(struct pkt_metadata *md)
{
md->conn = NULL;
}

static inline void
pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
{
Expand All @@ -157,6 +167,7 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
md->tunnel.ip_dst = 0;
md->tunnel.ipv6_dst = in6addr_any;
md->in_port.odp_port = port;
md->conn = NULL;
}

/* This function prefetches the cachelines touched by pkt_metadata_init()
Expand Down
63 changes: 63 additions & 0 deletions tests/system-traffic.at
Original file line number Diff line number Diff line change
Expand Up @@ -5618,6 +5618,69 @@ grep "tcp,orig=(src=10.254.254.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),r
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

AT_SETUP([conntrack - negative test for recirculation optimization])
dnl This test will fail if 'conn' caching is being used, because the tuple
dnl has been changed outside of conntrack.
AT_SKIP_IF([test $HAVE_NC = no])
CHECK_CONNTRACK()
OVS_TRAFFIC_VSWITCHD_START()
OVS_CHECK_CT_CLEAR()

ADD_NAMESPACES(at_ns0, at_ns1)
ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") dnl FIP 10.254.254.1
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") dnl FIP 10.254.254.2

dnl Static ARPs
NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr f0:00:00:01:01:02 dev p0])
NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr f0:00:00:01:01:01 dev p1])

dnl Static ARP and route entries for the FIP "gateway"
NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.254 lladdr f0:00:00:01:01:FE dev p0])
NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.254 lladdr f0:00:00:01:01:FE dev p1])
NS_CHECK_EXEC([at_ns0], [ip route add default nexthop via 10.1.1.254])
NS_CHECK_EXEC([at_ns1], [ip route add default nexthop via 10.1.1.254])

NETNS_DAEMONIZE([at_ns0], [nc -l -k 1234 > /dev/null], [nc0.pid])

AT_DATA([flows.txt], [dnl
table=0,priority=10 ip action=ct(table=1)
dnl dst FIP
table=1,priority=20 ip,ct_state=+trk+est,nw_dst=10.254.254.0/24 action=goto_table:2
table=1,priority=20 ip,ct_state=+trk+new,nw_dst=10.254.254.0/24 action=ct(commit,exec(set_field:1->ct_mark),table=2)
dnl
dnl FIP translation (dst FIP, src local) --> (dst local, src FIP)
table=2 ip,nw_dst=10.254.254.1 action=set_field:10.1.1.1->nw_dst,goto_table:3
table=2 ip,nw_dst=10.254.254.2 action=set_field:10.1.1.2->nw_dst,goto_table:3
table=3 ip,nw_src=10.1.1.1 action=set_field:10.254.254.1->nw_src,goto_table:4
table=3 ip,nw_src=10.1.1.2 action=set_field:10.254.254.2->nw_src,goto_table:4
table=4 ip,nw_dst=10.1.1.1 action=set_field:f0:00:00:01:01:01->eth_dst,goto_table:5
table=4 ip,nw_dst=10.1.1.2 action=set_field:f0:00:00:01:01:02->eth_dst,goto_table:5
table=5 ip,nw_src=10.254.254.0/24 action=set_field:f0:00:00:01:01:FE->eth_src,goto_table:6
dnl
dnl Tuple has been changed outside of conntrack
table=6,priority=10 ip action=ct(table=7)
dnl
table=7 ip,ct_state=+trk+est action=goto_table:8
table=7 ip,ct_mark=0x0,ct_state=+trk+new action=ct(commit,exec(set_field:2->ct_mark),table=8)
dnl
table=8 ip,nw_dst=10.1.1.1 action=output:ovs-p0
table=8 ip,nw_dst=10.1.1.2 action=output:ovs-p1
])

AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])

NS_CHECK_EXEC([at_ns1], [echo "foobar" |nc $NC_EOF_OPT 10.254.254.1 1234])

AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.254.254)], [0], [dnl
tcp,orig=(src=10.1.1.2,dst=10.254.254.1,sport=<cleared>,dport=<cleared>),reply=(src=10.254.254.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),mark=1,protoinfo=(state=<cleared>)
tcp,orig=(src=10.254.254.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.254.254.2,sport=<cleared>,dport=<cleared>),mark=2,protoinfo=(state=<cleared>)
])

ovs-appctl dpif/dump-flows br0

OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

AT_BANNER([802.1ad])

AT_SETUP([802.1ad - vlan_limit])
Expand Down

0 comments on commit 594570e

Please sign in to comment.