Skip to content

Commit

Permalink
Merge branch 'lwt-ipv6'
Browse files Browse the repository at this point in the history
Jiri Benc says:

====================
lwtunnel: per route ipv6 support for vxlan

v3: Moved LWTUNNEL_ENCAP_IP6 definition in patch 13.
v2: Fixed issues in patch 4 pointed out by Alexei.

This series enables IPv6 tunnels based on lwtunnel infrastructure. Only
vxlan is supported for now.

Tested in all combinations of IPv4 over IPv6, IPv6 over IPv4 and IPv6 over
IPv6.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 20, 2015
2 parents 938049e + 32a2b00 commit 08617f4
Show file tree
Hide file tree
Showing 30 changed files with 312 additions and 233 deletions.
1 change: 0 additions & 1 deletion drivers/net/vrf.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ static struct rtable *vrf_rtable_create(struct net_device *dev)
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_uncached_list = NULL;
rth->rt_lwtstate = NULL;
}

return rth;
Expand Down
89 changes: 50 additions & 39 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,

hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
inet_sk(vs->sock->sk)->sk.sk_family == family &&
vxlan_get_sk_family(vs) == family &&
vs->flags == flags)
return vs;
}
Expand Down Expand Up @@ -625,7 +625,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
struct net_device *dev;
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
sa_family_t sa_family = vxlan_get_sk_family(vs);
__be16 port = inet_sk(sk)->inet_sport;
int err;

Expand All @@ -650,7 +650,7 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
struct net_device *dev;
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
sa_family_t sa_family = vxlan_get_sk_family(vs);
__be16 port = inet_sk(sk)->inet_sport;

rcu_read_lock();
Expand Down Expand Up @@ -1269,17 +1269,27 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
}

if (vxlan_collect_metadata(vs)) {
const struct iphdr *iph = ip_hdr(skb);

tun_dst = metadata_dst_alloc(sizeof(*md), GFP_ATOMIC);
if (!tun_dst)
goto drop;

info = &tun_dst->u.tun_info;
info->key.ipv4_src = iph->saddr;
info->key.ipv4_dst = iph->daddr;
info->key.ipv4_tos = iph->tos;
info->key.ipv4_ttl = iph->ttl;
if (vxlan_get_sk_family(vs) == AF_INET) {
const struct iphdr *iph = ip_hdr(skb);

info->key.u.ipv4.src = iph->saddr;
info->key.u.ipv4.dst = iph->daddr;
info->key.tos = iph->tos;
info->key.ttl = iph->ttl;
} else {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);

info->key.u.ipv6.src = ip6h->saddr;
info->key.u.ipv6.dst = ip6h->daddr;
info->key.tos = ipv6_get_dsfield(ip6h);
info->key.ttl = ip6h->hop_limit;
}

info->key.tp_src = udp_hdr(skb)->source;
info->key.tp_dst = udp_hdr(skb)->dest;

Expand Down Expand Up @@ -1894,6 +1904,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk = vxlan->vn_sock->sock->sk;
unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
struct rtable *rt = NULL;
const struct iphdr *old_iph;
struct flowi4 fl4;
Expand All @@ -1908,8 +1919,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
int err;
u32 flags = vxlan->flags;

/* FIXME: Support IPv6 */
info = skb_tunnel_info(skb, AF_INET);
info = skb_tunnel_info(skb);

if (rdst) {
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
Expand All @@ -1924,8 +1934,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = be64_to_cpu(info->key.tun_id);
remote_ip.sin.sin_family = AF_INET;
remote_ip.sin.sin_addr.s_addr = info->key.ipv4_dst;
remote_ip.sa.sa_family = family;
if (family == AF_INET)
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
dst = &remote_ip;
}

Expand All @@ -1951,23 +1964,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);

if (info) {
if (info->key.tun_flags & TUNNEL_CSUM)
flags |= VXLAN_F_UDP_CSUM;
else
flags &= ~VXLAN_F_UDP_CSUM;

ttl = info->key.ttl;
tos = info->key.tos;

if (info->options_len)
md = ip_tunnel_info_opts(info, sizeof(*md));
} else {
md->gbp = skb->mark;
}

if (dst->sa.sa_family == AF_INET) {
if (info) {
if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF);
if (info->key.tun_flags & TUNNEL_CSUM)
flags |= VXLAN_F_UDP_CSUM;
else
flags &= ~VXLAN_F_UDP_CSUM;

ttl = info->key.ipv4_ttl;
tos = info->key.ipv4_tos;

if (info->options_len)
md = ip_tunnel_info_opts(info, sizeof(*md));
} else {
md->gbp = skb->mark;
}
if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
df = htons(IP_DF);

memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
Expand Down Expand Up @@ -2025,7 +2039,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else {
struct dst_entry *ndst;
struct flowi6 fl6;
u32 flags;
u32 rt6i_flags;

memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
Expand All @@ -2050,9 +2064,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}

/* Bypass encapsulation if the destination is local */
flags = ((struct rt6_info *)ndst)->rt6i_flags;
if (flags & RTF_LOCAL &&
!(flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
if (rt6i_flags & RTF_LOCAL &&
!(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;

dst_release(ndst);
Expand All @@ -2066,12 +2080,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}

ttl = ttl ? : ip6_dst_hoplimit(ndst);
md->gbp = skb->mark;

err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
0, ttl, src_port, dst_port, htonl(vni << 8), md,
!net_eq(vxlan->net, dev_net(vxlan->dev)),
vxlan->flags);
flags);
#endif
}

Expand Down Expand Up @@ -2104,8 +2116,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f;

/* FIXME: Support IPv6 */
info = skb_tunnel_info(skb, AF_INET);
info = skb_tunnel_info(skb);

skb_reset_mac_header(skb);
eth = eth_hdr(skb);
Expand Down Expand Up @@ -2390,7 +2401,7 @@ void vxlan_get_rx_port(struct net_device *dev)
for (i = 0; i < PORT_HASH_SIZE; ++i) {
hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
port = inet_sk(vs->sock->sk)->inet_sport;
sa_family = vs->sock->sk->sk_family;
sa_family = vxlan_get_sk_family(vs);
dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
port);
}
Expand Down
3 changes: 2 additions & 1 deletion include/net/dst.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct dst_entry {
#else
void *__pad1;
#endif
struct lwtunnel_state *lwtstate;
int (*input)(struct sk_buff *);
int (*output)(struct sock *sk, struct sk_buff *skb);

Expand Down Expand Up @@ -89,7 +90,7 @@ struct dst_entry {
* (L1_CACHE_SIZE would be too much)
*/
#ifdef CONFIG_64BIT
long __pad_to_align_refcnt[2];
long __pad_to_align_refcnt[1];
#endif
/*
* __refcnt wants to be on a different cache line from
Expand Down
15 changes: 5 additions & 10 deletions include/net/dst_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,17 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
return NULL;
}

static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb,
int family)
static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
struct rtable *rt;
struct dst_entry *dst;

if (md_dst)
return &md_dst->u.tun_info;

switch (family) {
case AF_INET:
rt = (struct rtable *)skb_dst(skb);
if (rt && rt->rt_lwtstate)
return lwt_tun_info(rt->rt_lwtstate);
break;
}
dst = skb_dst(skb);
if (dst && dst->lwtstate)
return lwt_tun_info(dst->lwtstate);

return NULL;
}
Expand Down
1 change: 1 addition & 0 deletions include/net/flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct flowi6 {
#define flowi6_proto __fl_common.flowic_proto
#define flowi6_flags __fl_common.flowic_flags
#define flowi6_secid __fl_common.flowic_secid
#define flowi6_tun_key __fl_common.flowic_tun_key
struct in6_addr daddr;
struct in6_addr saddr;
__be32 flowlabel;
Expand Down
1 change: 0 additions & 1 deletion include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ struct rt6_info {
/* more non-fragment space at head required */
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
struct lwtunnel_state *rt6i_lwtstate;
};

static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
Expand Down
50 changes: 32 additions & 18 deletions include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,32 @@
#define IPTUNNEL_ERR_TIMEO (30*HZ)

/* Used to memset ip_tunnel padding. */
#define IP_TUNNEL_KEY_SIZE \
(offsetof(struct ip_tunnel_key, tp_dst) + \
FIELD_SIZEOF(struct ip_tunnel_key, tp_dst))
#define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst)

/* Used to memset ipv4 address padding. */
#define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst)
#define IP_TUNNEL_KEY_IPV4_PAD_LEN \
(FIELD_SIZEOF(struct ip_tunnel_key, u) - \
FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4))

struct ip_tunnel_key {
__be64 tun_id;
__be32 ipv4_src;
__be32 ipv4_dst;
union {
struct {
__be32 src;
__be32 dst;
} ipv4;
struct {
struct in6_addr src;
struct in6_addr dst;
} ipv6;
} u;
__be16 tun_flags;
__u8 ipv4_tos;
__u8 ipv4_ttl;
u8 tos; /* TOS for IPv4, TC for IPv6 */
u8 ttl; /* TTL for IPv4, HL for IPv6 */
__be16 tp_src;
__be16 tp_dst;
} __packed __aligned(4); /* Minimize padding. */
};

/* Indicates whether the tunnel info structure represents receive
* or transmit tunnel parameters.
Expand Down Expand Up @@ -64,8 +76,8 @@ struct ip_tunnel_6rd_parm {
#endif

struct ip_tunnel_encap {
__u16 type;
__u16 flags;
u16 type;
u16 flags;
__be16 sport;
__be16 dport;
};
Expand Down Expand Up @@ -95,8 +107,8 @@ struct ip_tunnel {
* arrived */

/* These four fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
__u32 o_seqno; /* The last output seqno */
u32 i_seqno; /* The last seen seqno */
u32 o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
int mlink;

Expand Down Expand Up @@ -179,10 +191,12 @@ static inline void __ip_tunnel_info_init(struct ip_tunnel_info *tun_info,
const void *opts, u8 opts_len)
{
tun_info->key.tun_id = tun_id;
tun_info->key.ipv4_src = saddr;
tun_info->key.ipv4_dst = daddr;
tun_info->key.ipv4_tos = tos;
tun_info->key.ipv4_ttl = ttl;
tun_info->key.u.ipv4.src = saddr;
tun_info->key.u.ipv4.dst = daddr;
memset((unsigned char *)&tun_info->key + IP_TUNNEL_KEY_IPV4_PAD,
0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
tun_info->key.tos = tos;
tun_info->key.ttl = ttl;
tun_info->key.tun_flags = tun_flags;

/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
Expand Down Expand Up @@ -273,8 +287,8 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,

int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet);
__be32 src, __be32 dst, u8 proto,
u8 tos, u8 ttl, __be16 df, bool xnet);

struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
int gso_type_mask);
Expand Down
12 changes: 0 additions & 12 deletions include/net/lwtunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
int lwtunnel_input(struct sk_buff *skb);
int lwtunnel_input6(struct sk_buff *skb);

#else

Expand Down Expand Up @@ -164,21 +162,11 @@ static inline int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
return -EOPNOTSUPP;
}

static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

static inline int lwtunnel_input(struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

static inline int lwtunnel_input6(struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

#endif

#endif /* __NET_LWTUNNEL_H */
3 changes: 2 additions & 1 deletion include/net/ndisc.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ int ndisc_rcv(struct sk_buff *skb);

void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr);
const struct in6_addr *daddr, const struct in6_addr *saddr,
struct sk_buff *oskb);

void ndisc_send_rs(struct net_device *dev,
const struct in6_addr *saddr, const struct in6_addr *daddr);
Expand Down
1 change: 0 additions & 1 deletion include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ struct rtable {

struct list_head rt_uncached;
struct uncached_list *rt_uncached_list;
struct lwtunnel_state *rt_lwtstate;
};

static inline bool rt_is_input_route(const struct rtable *rt)
Expand Down
Loading

0 comments on commit 08617f4

Please sign in to comment.