Skip to content

Commit

Permalink
datapath: Add basic MPLS support to kernel
Browse files Browse the repository at this point in the history
Allow datapath to recognize and extract MPLS labels into flow keys
and execute actions which push, pop, and set labels on packets.

Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer.

Cc: Ravi K <[email protected]>
Cc: Leo Alterman <[email protected]>
Cc: Isaku Yamahata <[email protected]>
Cc: Joe Stringer <[email protected]>
Signed-off-by: Simon Horman <[email protected]>
Signed-off-by: Jesse Gross <[email protected]>
  • Loading branch information
horms authored and jessegross committed Jun 24, 2014
1 parent b2f771e commit ccf4378
Show file tree
Hide file tree
Showing 14 changed files with 429 additions and 65 deletions.
4 changes: 0 additions & 4 deletions OPENFLOW-1.1+
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ OpenFlow 1.1
The list of remaining work items for OpenFlow 1.1 is below. It is
probably incomplete.

* MPLS. Simon Horman maintains a patch series that adds this
feature. This is partially merged.
[optional for OF1.1+]

* Match and set double-tagged VLANs (QinQ). This requires kernel
work for reasonable performance.
[optional for OF1.1+]
Expand Down
1 change: 1 addition & 0 deletions datapath/Modules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ openvswitch_headers = \
flow.h \
flow_netlink.h \
flow_table.h \
mpls.h \
vlan.h \
vport.h \
vport-internal_dev.h \
Expand Down
115 changes: 114 additions & 1 deletion datapath/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include <net/sctp/checksum.h>

#include "datapath.h"
#include "gso.h"
#include "mpls.h"
#include "vlan.h"
#include "vport.h"

Expand All @@ -49,6 +51,98 @@ static int make_writable(struct sk_buff *skb, int write_len)
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}

/* The end of the mac header.
*
* For non-MPLS skbs this will correspond to the network header.
* For MPLS skbs it will be before the network_header as the MPLS
* label stack lies between the end of the mac header and the network
* header. That is, for MPLS skbs the end of the mac header
* is the top of the MPLS label stack.
*/
static unsigned char *mac_header_end(const struct sk_buff *skb)
{
return skb_mac_header(skb) + skb->mac_len;
}

static int push_mpls(struct sk_buff *skb,
const struct ovs_action_push_mpls *mpls)
{
__be32 *new_mpls_lse;
struct ethhdr *hdr;

if (skb_cow_head(skb, MPLS_HLEN) < 0)
return -ENOMEM;

skb_push(skb, MPLS_HLEN);
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);
skb_reset_mac_header(skb);

new_mpls_lse = (__be32 *)mac_header_end(skb);
*new_mpls_lse = mpls->mpls_lse;

if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
MPLS_HLEN, 0));

hdr = eth_hdr(skb);
hdr->h_proto = mpls->mpls_ethertype;
if (!ovs_skb_get_inner_protocol(skb))
ovs_skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = mpls->mpls_ethertype;
return 0;
}

static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
{
struct ethhdr *hdr;
int err;

err = make_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;

if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_sub(skb->csum,
csum_partial(mac_header_end(skb),
MPLS_HLEN, 0));

memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);

__skb_pull(skb, MPLS_HLEN);
skb_reset_mac_header(skb);

/* mac_header_end() is used to locate the ethertype
* field correctly in the presence of VLAN tags.
*/
hdr = (struct ethhdr *)(mac_header_end(skb) - ETH_HLEN);
hdr->h_proto = ethertype;
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
return 0;
}

static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
{
__be32 *stack = (__be32 *)mac_header_end(skb);
int err;

err = make_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;

if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), *mpls_lse };
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}

*stack = *mpls_lse;

return 0;
}

/* remove VLAN header from packet and update csum accordingly. */
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
{
Expand All @@ -71,7 +165,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)

vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
skb_reset_mac_len(skb);
/* Update mac_len for subsequent MPLS actions */
skb->mac_len -= VLAN_HLEN;

return 0;
}
Expand Down Expand Up @@ -116,6 +211,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
return -ENOMEM;

/* Update mac_len for subsequent MPLS actions */
skb->mac_len += VLAN_HLEN;

if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(skb->data
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
Expand Down Expand Up @@ -545,6 +643,10 @@ static int execute_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_SCTP:
err = set_sctp(skb, nla_data(nested_attr));
break;

case OVS_KEY_ATTR_MPLS:
err = set_mpls(skb, nla_data(nested_attr));
break;
}

return err;
Expand Down Expand Up @@ -606,6 +708,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, a);
break;

case OVS_ACTION_ATTR_PUSH_MPLS:
err = push_mpls(skb, nla_data(a));
break;

case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, nla_get_be16(a));
break;

case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, nla_data(a));
if (unlikely(err)) /* skb already freed. */
Expand Down Expand Up @@ -701,6 +811,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, bool recirc)
goto out_loop;
}

if (!recirc)
ovs_skb_init_inner_protocol(skb);

OVS_CB(skb)->tun_info = NULL;
error = do_execute_actions(dp, skb, acts->actions, acts->actions_len);

Expand Down
8 changes: 4 additions & 4 deletions datapath/datapath.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ static size_t key_attr_size(void)
{
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function. */
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 21);
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);

return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
Expand Down Expand Up @@ -586,7 +586,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_flow_free;

err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, 0, &acts);
&flow->key, &acts);
rcu_assign_pointer(flow->sf_acts, acts);
if (err)
goto err_flow_free;
Expand Down Expand Up @@ -874,7 +874,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow;

error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
0, &acts);
&acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree_acts;
Expand Down Expand Up @@ -978,7 +978,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
return acts;

ovs_flow_mask_key(&masked_key, key, mask);
error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
error = ovs_nla_copy_actions(a, &masked_key, &acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
kfree(acts);
Expand Down
29 changes: 29 additions & 0 deletions datapath/flow.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <net/ipv6.h>
#include <net/ndisc.h>

#include "mpls.h"
#include "vlan.h"

u64 ovs_flow_used_time(unsigned long flow_jiffies)
Expand Down Expand Up @@ -503,6 +504,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
return -ENOMEM;

skb_reset_network_header(skb);
skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));

/* Network layer. */
Expand Down Expand Up @@ -605,6 +607,33 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;

/* In the presence of an MPLS label stack the end of the L2
* header and the beginning of the L3 header differ.
*
* Advance network_header to the beginning of the L3
* header. mac_len corresponds to the end of the L2 header.
*/
while (1) {
__be32 lse;

error = check_header(skb, skb->mac_len + stack_len);
if (unlikely(error))
return 0;

memcpy(&lse, skb_network_header(skb), MPLS_HLEN);

if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);

skb_set_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_BOS_MASK))
break;

stack_len += MPLS_HLEN;
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */

Expand Down
17 changes: 11 additions & 6 deletions datapath/flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,17 @@ struct sw_flow_key {
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
__be16 type; /* Ethernet frame type. */
} eth;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
union {
struct {
__be32 top_lse; /* top label stack entry */
} mpls;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
};
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
Expand Down
Loading

0 comments on commit ccf4378

Please sign in to comment.