Skip to content

Commit

Permalink
tunnels: Don't initialize unnecessary packet metadata.
Browse files Browse the repository at this point in the history
The addition of Geneve options to packet metadata significantly
expanded its size. It was reported that this can decrease performance
for DPDK ports by up to 25% since we need to initialize the whole
structure on each packet receive.

It is not really necessary to zero out the entire structure because
miniflow_extract() only copies the tunnel metadata when particular
fields indicate that it is valid. Therefore, as long as we zero out
these fields when the metadata is initialized and ensure that the
rest of the structure is correctly set in the presence of a tunnel,
we can avoid touching the tunnel fields on packet reception.

Reported-by: Ciara Loftus <[email protected]>
Tested-by: Ciara Loftus <[email protected]>
Signed-off-by: Jesse Gross <[email protected]>
Acked-by: Ben Pfaff <[email protected]>
  • Loading branch information
jessegross committed Jul 1, 2015
1 parent 421e242 commit 35303d7
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 24 deletions.
2 changes: 1 addition & 1 deletion lib/dp-packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
b->source = source;
b->l2_pad_size = 0;
b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX;
b->md = PKT_METADATA_INITIALIZER(0);
pkt_metadata_init(&b->md, 0);
}

static void
Expand Down
21 changes: 10 additions & 11 deletions lib/dpif-netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ enum pmd_cycles_counter_type {

/* A port in a netdev-based datapath. */
struct dp_netdev_port {
struct pkt_metadata md;
odp_port_t port_no;
struct netdev *netdev;
struct cmap_node node; /* Node in dp_netdev's 'ports'. */
struct netdev_saved_flags *sf;
Expand Down Expand Up @@ -1085,7 +1085,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
}
}
port = xzalloc(sizeof *port);
port->md = PKT_METADATA_INITIALIZER(port_no);
port->port_no = port_no;
port->netdev = netdev;
port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
port->type = xstrdup(type);
Expand Down Expand Up @@ -1190,7 +1190,7 @@ dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
struct dp_netdev_port *port;

CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
if (port->md.in_port.odp_port == port_no) {
if (port->port_no == port_no) {
return port;
}
}
Expand Down Expand Up @@ -1300,8 +1300,7 @@ static void
do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
OVS_REQUIRES(dp->port_mutex)
{
cmap_remove(&dp->ports, &port->node,
hash_odp_port(port->md.in_port.odp_port));
cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
seq_change(dp->port_seq);
if (netdev_is_pmd(port->netdev)) {
int numa_id = netdev_get_numa_id(port->netdev);
Expand All @@ -1323,7 +1322,7 @@ answer_port_query(const struct dp_netdev_port *port,
{
dpif_port->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->type = xstrdup(port->type);
dpif_port->port_no = port->md.in_port.odp_port;
dpif_port->port_no = port->port_no;
}

static int
Expand Down Expand Up @@ -1450,7 +1449,7 @@ dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
state->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->name = state->name;
dpif_port->type = port->type;
dpif_port->port_no = port->md.in_port.odp_port;
dpif_port->port_no = port->port_no;

retval = 0;
} else {
Expand Down Expand Up @@ -2540,7 +2539,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,

/* XXX: initialize md in netdev implementation. */
for (i = 0; i < cnt; i++) {
packets[i]->md = port->md;
pkt_metadata_init(&packets[i]->md, port->port_no);
}
cycles_count_start(pmd);
dp_netdev_input(pmd, packets, cnt);
Expand Down Expand Up @@ -3652,12 +3651,12 @@ dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
}

/* Remove old port. */
cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->md.in_port.odp_port));
cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
ovsrcu_postpone(free, old_port);

/* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
new_port = xmemdup(old_port, sizeof *old_port);
new_port->md.in_port.odp_port = port_no;
new_port->port_no = port_no;
cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));

seq_change(dp->port_seq);
Expand Down Expand Up @@ -3688,7 +3687,7 @@ dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED,
ovs_mutex_lock(&dp->port_mutex);
if (get_port_by_name(dp, argv[2], &port)) {
unixctl_command_reply_error(conn, "unknown port");
} else if (port->md.in_port.odp_port == ODPP_LOCAL) {
} else if (port->port_no == ODPP_LOCAL) {
unixctl_command_reply_error(conn, "can't delete local port");
} else {
do_del_port(dp, port);
Expand Down
16 changes: 13 additions & 3 deletions lib/netdev-vport.c
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,16 @@ parse_gre_header(struct dp_packet *packet,
return hlen;
}

static void
pkt_metadata_init_tnl(struct pkt_metadata *md)
{
memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata));

/* If 'opt_map' is zero then none of the rest of the tunnel metadata
* will be read, so we can skip clearing it. */
md->tunnel.metadata.opt_map = 0;
}

static int
netdev_gre_pop_header(struct dp_packet *packet)
{
Expand All @@ -1059,7 +1069,7 @@ netdev_gre_pop_header(struct dp_packet *packet)
int hlen = sizeof(struct eth_header) +
sizeof(struct ip_header) + 4;

memset(md, 0, sizeof *md);
pkt_metadata_init_tnl(md);
if (hlen > dp_packet_size(packet)) {
return EINVAL;
}
Expand Down Expand Up @@ -1143,7 +1153,7 @@ netdev_vxlan_pop_header(struct dp_packet *packet)
struct flow_tnl *tnl = &md->tunnel;
struct vxlanhdr *vxh;

memset(md, 0, sizeof *md);
pkt_metadata_init_tnl(md);
if (VXLAN_HLEN > dp_packet_size(packet)) {
return EINVAL;
}
Expand Down Expand Up @@ -1201,7 +1211,7 @@ netdev_geneve_pop_header(struct dp_packet *packet)
unsigned int hlen;
int err;

memset(md, 0, sizeof *md);
pkt_metadata_init_tnl(md);
if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
(unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
Expand Down
2 changes: 1 addition & 1 deletion lib/netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ netdev_push_header(const struct netdev *netdev,

for (i = 0; i < cnt; i++) {
netdev->netdev_class->push_header(buffers[i], data);
buffers[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
pkt_metadata_init(&buffers[i]->md, u32_to_odp(data->out_port));
}

return 0;
Expand Down
3 changes: 2 additions & 1 deletion lib/odp-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,7 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
enum odp_key_fitness
odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
{
memset(tun, 0, sizeof *tun);
return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun);
}

Expand Down Expand Up @@ -3666,7 +3667,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL |
1u << OVS_KEY_ATTR_IN_PORT;

*md = PKT_METADATA_INITIALIZER(ODPP_NONE);
pkt_metadata_init(md, ODPP_NONE);

NL_ATTR_FOR_EACH (nla, left, key, key_len) {
uint16_t type = nl_attr_type(nla);
Expand Down
15 changes: 12 additions & 3 deletions lib/packets.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ struct ds;
/* Tunnel information used in flow key and metadata. */
struct flow_tnl {
ovs_be64 tun_id;
ovs_be32 ip_src;
ovs_be32 ip_dst;
ovs_be32 ip_src;
uint16_t flags;
uint8_t ip_tos;
uint8_t ip_ttl;
Expand Down Expand Up @@ -69,8 +69,17 @@ struct pkt_metadata {
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
};

#define PKT_METADATA_INITIALIZER(PORT) \
(struct pkt_metadata){ .in_port.odp_port = PORT }
static inline void
pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
{
/* It can be expensive to zero out all of the tunnel metadata. However,
* we can just zero out ip_dst and the rest of the data will never be
* looked at. */
memset(md, 0, offsetof(struct pkt_metadata, tunnel));
md->tunnel.ip_dst = 0;

md->in_port.odp_port = port;
}

bool dpid_from_string(const char *s, uint64_t *dpidp);

Expand Down
2 changes: 1 addition & 1 deletion lib/tun-metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ struct geneve_opt;
* (not necessarily even contiguous), and finding it requires referring to
* 'tab'. */
struct tun_metadata {
uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
uint64_t opt_map; /* 1-bit for each present TLV. */
uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
struct tun_table *tab; /* Types & lengths for 'opts' and 'opt_map'. */
uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 bytes */
};
Expand Down
1 change: 0 additions & 1 deletion ofproto/ofproto-dpif-upcall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,6 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);

if (upcall->out_tun_key) {
memset(&output_tunnel_key, 0, sizeof output_tunnel_key);
odp_tun_key_from_attr(upcall->out_tun_key,
&output_tunnel_key);
}
Expand Down
4 changes: 2 additions & 2 deletions utilities/ovs-ofctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2018,7 +2018,7 @@ ofctl_ofp_parse_pcap(struct ovs_cmdl_context *ctx)
if (error) {
break;
}
packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE);
pkt_metadata_init(&packet->md, ODPP_NONE);
flow_extract(packet, &flow);
if (flow.dl_type == htons(ETH_TYPE_IP)
&& flow.nw_proto == IPPROTO_TCP
Expand Down Expand Up @@ -3374,7 +3374,7 @@ ofctl_parse_pcap(struct ovs_cmdl_context *ctx)
ovs_fatal(error, "%s: read failed", ctx->argv[1]);
}

packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE);
pkt_metadata_init(&packet->md, ODPP_NONE);
flow_extract(packet, &flow);
flow_print(stdout, &flow);
putchar('\n');
Expand Down

0 comments on commit 35303d7

Please sign in to comment.