From ccf4378615e93618e6ab8423fa1400b40876df91 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 24 Jun 2014 20:56:57 +0900 Subject: [PATCH] datapath: Add basic MPLS support to kernel Allow datapath to recognize and extract MPLS labels into flow keys and execute actions which push, pop, and set labels on packets. Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer. Cc: Ravi K Cc: Leo Alterman Cc: Isaku Yamahata Cc: Joe Stringer Signed-off-by: Simon Horman Signed-off-by: Jesse Gross --- OPENFLOW-1.1+ | 4 - datapath/Modules.mk | 1 + datapath/actions.c | 115 +++++++++++++++- datapath/datapath.c | 8 +- datapath/flow.c | 29 ++++ datapath/flow.h | 17 ++- datapath/flow_netlink.c | 130 +++++++++++++++--- datapath/flow_netlink.h | 2 +- datapath/linux/compat/gso.c | 91 ++++++++++-- datapath/linux/compat/gso.h | 57 ++++++-- .../linux/compat/include/linux/netdevice.h | 6 +- datapath/linux/compat/netdevice.c | 10 +- datapath/mpls.h | 15 ++ include/linux/openvswitch.h | 9 +- 14 files changed, 429 insertions(+), 65 deletions(-) create mode 100644 datapath/mpls.h diff --git a/OPENFLOW-1.1+ b/OPENFLOW-1.1+ index 97c29231e0f..476f79a96c1 100644 --- a/OPENFLOW-1.1+ +++ b/OPENFLOW-1.1+ @@ -54,10 +54,6 @@ OpenFlow 1.1 The list of remaining work items for OpenFlow 1.1 is below. It is probably incomplete. - * MPLS. Simon Horman maintains a patch series that adds this - feature. This is partially merged. - [optional for OF1.1+] - * Match and set double-tagged VLANs (QinQ). This requires kernel work for reasonable performance. [optional for OF1.1+] diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 41ffbea5bc5..90e158cd249 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -27,6 +27,7 @@ openvswitch_headers = \ flow.h \ flow_netlink.h \ flow_table.h \ + mpls.h \ vlan.h \ vport.h \ vport-internal_dev.h \ diff --git a/datapath/actions.c b/datapath/actions.c index 72fdcf9f504..cb26ad59280 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -35,6 +35,8 @@ #include #include "datapath.h" +#include "gso.h" +#include "mpls.h" #include "vlan.h" #include "vport.h" @@ -49,6 +51,98 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } +/* The end of the mac header. + * + * For non-MPLS skbs this will correspond to the network header. + * For MPLS skbs it will be before the network_header as the MPLS + * label stack lies between the end of the mac header and the network + * header. That is, for MPLS skbs the end of the mac header + * is the top of the MPLS label stack. + */ +static unsigned char *mac_header_end(const struct sk_buff *skb) +{ + return skb_mac_header(skb) + skb->mac_len; +} + +static int push_mpls(struct sk_buff *skb, + const struct ovs_action_push_mpls *mpls) +{ + __be32 *new_mpls_lse; + struct ethhdr *hdr; + + if (skb_cow_head(skb, MPLS_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, MPLS_HLEN); + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + skb_reset_mac_header(skb); + + new_mpls_lse = (__be32 *)mac_header_end(skb); + *new_mpls_lse = mpls->mpls_lse; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, + MPLS_HLEN, 0)); + + hdr = eth_hdr(skb); + hdr->h_proto = mpls->mpls_ethertype; + if (!ovs_skb_get_inner_protocol(skb)) + ovs_skb_set_inner_protocol(skb, skb->protocol); + skb->protocol = mpls->mpls_ethertype; + return 0; +} + +static int pop_mpls(struct sk_buff *skb, const __be16 ethertype) +{ + struct ethhdr *hdr; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, + csum_partial(mac_header_end(skb), + MPLS_HLEN, 0)); + + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + + __skb_pull(skb, MPLS_HLEN); + skb_reset_mac_header(skb); + + /* mac_header_end() is used to locate the ethertype + * field correctly in the presence of VLAN tags. + */ + hdr = (struct ethhdr *)(mac_header_end(skb) - ETH_HLEN); + hdr->h_proto = ethertype; + if (eth_p_mpls(skb->protocol)) + skb->protocol = ethertype; + return 0; +} + +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) +{ + __be32 *stack = (__be32 *)mac_header_end(skb); + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be32 diff[] = { ~(*stack), *mpls_lse }; + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } + + *stack = *mpls_lse; + + return 0; +} + /* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { @@ -71,7 +165,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; - skb_reset_mac_len(skb); + /* Update mac_len for subsequent MPLS actions */ + skb->mac_len -= VLAN_HLEN; return 0; } @@ -116,6 +211,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; + /* Update mac_len for subsequent MPLS actions */ + skb->mac_len += VLAN_HLEN; + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); @@ -545,6 +643,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_SCTP: err = set_sctp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_MPLS: + err = set_mpls(skb, nla_data(nested_attr)); + break; } return err; @@ -606,6 +708,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, execute_hash(skb, a); break; + case OVS_ACTION_ATTR_PUSH_MPLS: + err = push_mpls(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_MPLS: + err = pop_mpls(skb, nla_get_be16(a)); + break; + case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ @@ -701,6 +811,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, bool recirc) goto out_loop; } + if (!recirc) + ovs_skb_init_inner_protocol(skb); + OVS_CB(skb)->tun_info = NULL; error = do_execute_actions(dp, skb, acts->actions, acts->actions_len); diff --git a/datapath/datapath.c b/datapath/datapath.c index 6f4236b41dc..4ec908e4bb4 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -382,7 +382,7 @@ static size_t key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 21); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -586,7 +586,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, 0, &acts); + &flow->key, &acts); rcu_assign_pointer(flow->sf_acts, acts); if (err) goto err_flow_free; @@ -874,7 +874,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - 0, &acts); + &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree_acts; @@ -978,7 +978,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, return acts; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); kfree(acts); diff --git a/datapath/flow.c b/datapath/flow.c index e90f99a3f8d..e234796817e 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -45,6 +45,7 @@ #include #include +#include "mpls.h" #include "vlan.h" u64 ovs_flow_used_time(unsigned long flow_jiffies) @@ -503,6 +504,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) return -ENOMEM; skb_reset_network_header(skb); + skb_reset_mac_len(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); /* Network layer. */ @@ -605,6 +607,33 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv4, 0, sizeof(key->ipv4)); } + } else if (eth_p_mpls(key->eth.type)) { + size_t stack_len = MPLS_HLEN; + + /* In the presence of an MPLS label stack the end of the L2 + * header and the beginning of the L3 header differ. + * + * Advance network_header to the beginning of the L3 + * header. mac_len corresponds to the end of the L2 header. + */ + while (1) { + __be32 lse; + + error = check_header(skb, skb->mac_len + stack_len); + if (unlikely(error)) + return 0; + + memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + + if (stack_len == MPLS_HLEN) + memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); + + skb_set_network_header(skb, skb->mac_len + stack_len); + if (lse & htonl(MPLS_BOS_MASK)) + break; + + stack_len += MPLS_HLEN; + } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/datapath/flow.h b/datapath/flow.h index 941486932f7..f6afa482ff4 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -104,12 +104,17 @@ struct sw_flow_key { __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ __be16 type; /* Ethernet frame type. */ } eth; - struct { - u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ - u8 tos; /* IP ToS. */ - u8 ttl; /* IP TTL/hop limit. */ - u8 frag; /* One of OVS_FRAG_TYPE_*. */ - } ip; + union { + struct { + __be32 top_lse; /* top label stack entry */ + } mpls; + struct { + u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ + u8 tos; /* IP ToS. */ + u8 ttl; /* IP TTL/hop limit. */ + u8 frag; /* One of OVS_FRAG_TYPE_*. */ + } ip; + }; struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 22ad2d00b29..5a978f05955 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -20,6 +20,7 @@ #include "flow.h" #include "datapath.h" +#include "mpls.h" #include #include #include @@ -127,7 +128,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1ULL << OVS_KEY_ATTR_ICMP) | (1ULL << OVS_KEY_ATTR_ICMPV6) | (1ULL << OVS_KEY_ATTR_ARP) - | (1ULL << OVS_KEY_ATTR_ND)); + | (1ULL << OVS_KEY_ATTR_ND) + | (1ULL << OVS_KEY_ATTR_MPLS)); /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) @@ -142,6 +144,13 @@ static bool match_validate(const struct sw_flow_match *match, mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP; } + + if (eth_p_mpls(match->key->eth.type)) { + key_expected |= 1ULL << OVS_KEY_ATTR_MPLS; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS; + } + if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1ULL << OVS_KEY_ATTR_IPV4; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) @@ -259,6 +268,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), [OVS_KEY_ATTR_TUNNEL] = -1, + [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), }; static bool is_all_zero(const u8 *fp, size_t size) @@ -710,6 +720,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, attrs &= ~(1ULL << OVS_KEY_ATTR_ARP); } + if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { + const struct ovs_key_mpls *mpls_key; + + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); + SW_FLOW_KEY_PUT(match, mpls.top_lse, + mpls_key->mpls_lse, is_mask); + + attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); + } + if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; @@ -1091,6 +1111,14 @@ int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey, arp_key->arp_op = htons(output->ip.proto); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); + } else if (eth_p_mpls(swkey->eth.type)) { + struct ovs_key_mpls *mpls_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + if (!nla) + goto nla_put_failure; + mpls_key = nla_data(nla); + mpls_key->mpls_lse = output->mpls.top_lse; } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1295,9 +1323,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci); + static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1334,7 +1368,8 @@ static int validate_and_copy_sample(const struct nlattr *attr, if (st_acts < 0) return st_acts; - err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa, + eth_type, vlan_tci); if (err) return err; @@ -1344,10 +1379,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key) +static int validate_tp_port(const struct sw_flow_key *flow_key, + __be16 eth_type) { - if ((flow_key->eth.type == htons(ETH_P_IP) || - flow_key->eth.type == htons(ETH_P_IPV6)) && + if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && (flow_key->tp.src || flow_key->tp.dst)) return 0; @@ -1442,7 +1477,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun) + bool *set_tun, __be16 eth_type) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1474,7 +1509,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) + if (eth_type != htons(ETH_P_IP)) return -EINVAL; if (!flow_key->ip.proto) @@ -1490,7 +1525,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) + if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; if (!flow_key->ip.proto) @@ -1512,19 +1547,24 @@ static int validate_set(const struct nlattr *a, if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); case OVS_KEY_ATTR_UDP: if (flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); + + case OVS_KEY_ATTR_MPLS: + if (!eth_p_mpls(eth_type)) + return -EINVAL; + break; case OVS_KEY_ATTR_SCTP: if (flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); default: return -EINVAL; @@ -1568,10 +1608,10 @@ static int copy_action(const struct nlattr *from, return 0; } -int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *a; int rem, err; @@ -1585,6 +1625,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), + [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -1638,19 +1680,63 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; + vlan_tci = vlan->vlan_tci; break; case OVS_ACTION_ATTR_RECIRC: break; + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nla_data(a); + + if (!eth_p_mpls(mpls->mpls_ethertype)) + return -EINVAL; + /* Prohibit push MPLS other than to a white list + * for packets that have a known tag order. + * + * vlan_tci indicates that the packet at one + * point had a VLAN. It may have been subsequently + * removed using pop VLAN so this rule is stricter + * than necessary. This is because it is not + * possible to know if a VLAN is still present + * after a pop VLAN action. */ + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + (eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6) && + eth_type != htons(ETH_P_ARP) && + eth_type != htons(ETH_P_RARP) && + !eth_p_mpls(eth_type))) + return -EINVAL; + eth_type = mpls->mpls_ethertype; + break; + } + + case OVS_ACTION_ATTR_POP_MPLS: + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + !eth_p_mpls(eth_type)) + return -EINVAL; + + /* Disallow subsequent L2.5+ set and mpls_pop actions + * as there is no check here to ensure that the new + * eth_type is valid and thus set actions could + * write off the end of the packet or otherwise + * corrupt it. + * + * Support for these actions is planned using packet + * recirculation. + */ + eth_type = htons(0); + break; + case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); + err = validate_set(a, key, sfa, &skip_copy, eth_type); if (err) return err; break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); + err = validate_and_copy_sample(a, key, depth, sfa, + eth_type, vlan_tci); if (err) return err; skip_copy = true; @@ -1672,6 +1758,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa) +{ + return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type, + key->eth.tci); +} + static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { const struct nlattr *a; diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h index 42de45678ac..0c20e8640e5 100644 --- a/datapath/flow_netlink.h +++ b/datapath/flow_netlink.h @@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *); int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c index 9ded17c63f5..8344293bbde 100644 --- a/datapath/linux/compat/gso.c +++ b/datapath/linux/compat/gso.c @@ -17,11 +17,12 @@ */ #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) #include #include #include +#include #include #include #include @@ -38,6 +39,8 @@ #include #include "gso.h" +#include "mpls.h" +#include "vlan.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \ !defined(HAVE_VLAN_BUG_WORKAROUND) @@ -50,10 +53,11 @@ MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets"); #define vlan_tso true #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) static bool dev_supports_vlan_tx(struct net_device *dev) { -#if defined(HAVE_VLAN_BUG_WORKAROUND) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) + return true; +#elif defined(HAVE_VLAN_BUG_WORKAROUND) return dev->features & NETIF_F_HW_VLAN_TX; #else /* Assume that the driver is buggy. */ @@ -61,24 +65,70 @@ static bool dev_supports_vlan_tx(struct net_device *dev) #endif } +/* Strictly this is not needed and will be optimised out + * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0). + * It is here to make things explicit should the compatibility + * code be extended in some way prior extending its life-span + * beyond v3.16. + */ +static bool supports_mpls_gso(void) +{ +/* MPLS GSO was introduced in v3.11, however it was not correctly + * activated using mpls_features until v3.16. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) + return true; +#else + return false; +#endif +} + int rpl_dev_queue_xmit(struct sk_buff *skb) { #undef dev_queue_xmit int err = -ENOMEM; + bool vlan, mpls; + + vlan = mpls = false; + + /* Avoid traversing any VLAN tags that are present to determine if + * the ethtype is MPLS. Instead compare the mac_len (end of L2) and + * skb_network_offset() (beginning of L3) whose inequality will + * indicate the presence of an MPLS label stack. */ + if (skb->mac_len != skb_network_offset(skb) && !supports_mpls_gso()) + mpls = true; + + if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) + vlan = true; - if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) { + if (vlan || mpls) { int features; features = netif_skb_features(skb); - if (!vlan_tso) - features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_UFO | NETIF_F_FSO); + if (vlan) { + if (!vlan_tso) + features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_UFO | NETIF_F_FSO); - skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - return err; - vlan_set_tci(skb, 0); + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + return err; + vlan_set_tci(skb, 0); + } + + /* As of v3.11 the kernel provides an mpls_features field in + * struct net_device which allows devices to advertise which + * features its supports for MPLS. This value defaults to + * NETIF_F_SG and as of v3.16. + * + * This compatibility code is intended for kernels older + * than v3.16 that do not support MPLS GSO and do not + * use mpls_features. Thus this code uses NETIF_F_SG + * directly in place of mpls_features. + */ + if (mpls) + features &= NETIF_F_SG; if (netif_needs_gso(skb, features)) { struct sk_buff *nskb; @@ -117,7 +167,6 @@ int rpl_dev_queue_xmit(struct sk_buff *skb) kfree_skb(skb); return err; } -#endif /* kernel version < 2.6.37 */ static __be16 __skb_network_protocol(struct sk_buff *skb) { @@ -135,9 +184,22 @@ static __be16 __skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + if (eth_p_mpls(type)) + type = ovs_skb_get_inner_protocol(skb); + return type; } +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) +static void tnl_fix_segment(struct sk_buff *skb) +{ + if (OVS_GSO_CB(skb)->fix_segment) + OVS_GSO_CB(skb)->fix_segment(skb); +} +#else +static void tnl_fix_segment(struct sk_buff *skb) { } +#endif + static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) @@ -178,8 +240,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, memcpy(ip_hdr(skb), iph, pkt_hlen); memcpy(skb->cb, cb, sizeof(cb)); - if (OVS_GSO_CB(skb)->fix_segment) - OVS_GSO_CB(skb)->fix_segment(skb); + tnl_fix_segment(skb); skb->protocol = proto; skb = skb->next; @@ -232,4 +293,4 @@ int rpl_ip_local_out(struct sk_buff *skb) } return ret; } -#endif /* 3.12 */ +#endif /* 3.16 */ diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h index 3041e8882b7..6281f294c80 100644 --- a/datapath/linux/compat/gso.h +++ b/datapath/linux/compat/gso.h @@ -4,6 +4,7 @@ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) +#include #include #include @@ -11,9 +12,11 @@ struct ovs_gso_cb { struct ovs_skb_cb dp_cb; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0) + __be16 inner_protocol; +#endif u16 inner_network_header; /* Offset from * inner_mac_header */ - /* 16bit hole */ sk_buff_data_t inner_mac_header; /* Offset from skb->head */ void (*fix_segment)(struct sk_buff *); }; @@ -51,12 +54,6 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb) return skb_inner_network_header(skb) - skb->data; } -#define skb_inner_mac_offset rpl_skb_inner_mac_offset -static inline int skb_inner_mac_offset(const struct sk_buff *skb) -{ - return skb_inner_mac_header(skb) - skb->data; -} - #define skb_reset_inner_headers rpl_skb_reset_inner_headers static inline void skb_reset_inner_headers(struct sk_buff *skb) { @@ -68,8 +65,52 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb) OVS_GSO_CB(skb)->fix_segment = NULL; } +#endif /* 3.12 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) #define ip_local_out rpl_ip_local_out int ip_local_out(struct sk_buff *skb); -#endif /* 3.12 */ +#define skb_inner_mac_offset rpl_skb_inner_mac_offset +static inline int skb_inner_mac_offset(const struct sk_buff *skb) +{ + return skb_inner_mac_header(skb) - skb->data; +} +#endif /* 3.16 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0) +static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) { + OVS_GSO_CB(skb)->inner_protocol = htons(0); +} + +static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb, + __be16 ethertype) { + OVS_GSO_CB(skb)->inner_protocol = ethertype; +} + +static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb) +{ + return OVS_GSO_CB(skb)->inner_protocol; +} + +#else + +static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) { + /* Nothing to do. The inner_protocol is either zero or + * has been set to a value by another user. + * Either way it may be considered initialised. + */ +} + +static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb, + __be16 ethertype) +{ + skb->inner_protocol = ethertype; +} + +static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb) +{ + return skb->inner_protocol; +} +#endif /* 3.11 */ #endif diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h index d726390ef48..886c2f83073 100644 --- a/datapath/linux/compat/include/linux/netdevice.h +++ b/datapath/linux/compat/include/linux/netdevice.h @@ -64,11 +64,13 @@ static inline struct net_device *dev_get_by_index_rcu(struct net *net, int ifind typedef u32 netdev_features_t; #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) #define skb_gso_segment rpl_skb_gso_segment struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features); +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) #define netif_skb_features rpl_netif_skb_features netdev_features_t rpl_netif_skb_features(struct sk_buff *skb); @@ -113,7 +115,7 @@ static inline struct net_device *netdev_master_upper_dev_get(struct net_device * } #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) #define dev_queue_xmit rpl_dev_queue_xmit int dev_queue_xmit(struct sk_buff *skb); #endif diff --git a/datapath/linux/compat/netdevice.c b/datapath/linux/compat/netdevice.c index 1dc5abf2d06..72bdec5f4e4 100644 --- a/datapath/linux/compat/netdevice.c +++ b/datapath/linux/compat/netdevice.c @@ -1,6 +1,9 @@ #include #include +#include "mpls.h" +#include "gso.h" + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) #ifndef HAVE_CAN_CHECKSUM_PROTOCOL static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) @@ -69,7 +72,9 @@ netdev_features_t rpl_netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, protocol, features); } } +#endif /* kernel version < 2.6.38 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -89,6 +94,9 @@ struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, vlan_depth += VLAN_HLEN; } + if (eth_p_mpls(type)) + type = ovs_skb_get_inner_protocol(skb); + /* this hack needed to get regular skb_gso_segment() */ #undef skb_gso_segment skb_proto = skb->protocol; @@ -98,4 +106,4 @@ struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, skb->protocol = skb_proto; return skb_gso; } -#endif /* kernel version < 2.6.38 */ +#endif /* kernel version < 3.16.0 */ diff --git a/datapath/mpls.h b/datapath/mpls.h new file mode 100644 index 00000000000..7eab104be5d --- /dev/null +++ b/datapath/mpls.h @@ -0,0 +1,15 @@ +#ifndef MPLS_H +#define MPLS_H 1 + +#include + +#define MPLS_BOS_MASK 0x00000100 +#define MPLS_HLEN 4 + +static inline bool eth_p_mpls(__be16 eth_type) +{ + return eth_type == htons(ETH_P_MPLS_UC) || + eth_type == htons(ETH_P_MPLS_MC); +} + +#endif diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 4f8404546d1..bf27dcbea34 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -319,15 +319,14 @@ enum ovs_key_attr { OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash is not computed by the datapath. */ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ + OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. + * The implementation may restrict + * the accepted length of the array. */ + #ifdef __KERNEL__ /* Only used within kernel data path. */ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ #endif - /* Experimental */ - - OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls. - * The implementation may restrict - * the accepted length of the array. */ __OVS_KEY_ATTR_MAX };