Skip to content

Commit

Permalink
ip_tunnel: Allow policy-based routing through tunnels
Browse files Browse the repository at this point in the history
This feature allows the administrator to set an fwmark for
packets traversing a tunnel.  This allows the use of independent
routing tables for tunneled packets without the use of iptables.

There is no concept of per-packet routing decisions through IPv4
tunnels, so this implementation does not need to work with
per-packet route lookups as the v6 implementation may
(with IP6_TNL_F_USE_ORIG_FWMARK).

Further, since the v4 tunnel ioctls share datastructures
(which can not be trivially modified) with the kernel's internal
tunnel configuration structures, the mark attribute must be stored
in the tunnel structure itself and passed as a parameter when
creating or changing tunnel attributes.

Signed-off-by: Craig Gallek <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
kraigatgoog authored and davem330 committed Apr 21, 2017
1 parent 0a473b8 commit 9830ad4
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 44 deletions.
5 changes: 3 additions & 2 deletions include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ struct ip_tunnel {
unsigned int prl_count; /* # of entries in PRL */
unsigned int ip_tnl_net_id;
struct gro_cells gro_cells;
__u32 fwmark;
bool collect_md;
bool ignore_df;
};
Expand Down Expand Up @@ -273,9 +274,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
struct ip_tunnel_parm *p, __u32 fwmark);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
struct ip_tunnel_parm *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);

struct ip_tunnel_encap_ops {
Expand Down
24 changes: 17 additions & 7 deletions net/ipv4/ip_gre.c
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,8 @@ static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
struct ip_tunnel_parm *parms)
struct ip_tunnel_parm *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);

Expand Down Expand Up @@ -886,6 +887,9 @@ static int ipgre_netlink_parms(struct net_device *dev,
t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
}

if (data[IFLA_GRE_FWMARK])
*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);

return 0;
}

Expand Down Expand Up @@ -957,6 +961,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
__u32 fwmark = 0;
int err;

if (ipgre_netlink_encap_parms(data, &ipencap)) {
Expand All @@ -967,31 +972,32 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
return err;
}

err = ipgre_netlink_parms(dev, data, tb, &p);
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
return ip_tunnel_newlink(dev, tb, &p);
return ip_tunnel_newlink(dev, tb, &p, fwmark);
}

static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
__u32 fwmark = t->fwmark;
int err;

if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
err = ip_tunnel_encap_setup(t, &ipencap);

if (err < 0)
return err;
}

err = ipgre_netlink_parms(dev, data, tb, &p);
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
return ip_tunnel_changelink(dev, tb, &p);
return ip_tunnel_changelink(dev, tb, &p, fwmark);
}

static size_t ipgre_get_size(const struct net_device *dev)
Expand Down Expand Up @@ -1029,6 +1035,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(0) +
/* IFLA_GRE_IGNORE_DF */
nla_total_size(1) +
/* IFLA_GRE_FWMARK */
nla_total_size(4) +
0;
}

Expand All @@ -1049,7 +1057,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
!!(p->iph.frag_off & htons(IP_DF))))
!!(p->iph.frag_off & htons(IP_DF))) ||
nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
goto nla_put_failure;

if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
Expand Down Expand Up @@ -1093,6 +1102,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
};

static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
Expand Down
27 changes: 17 additions & 10 deletions net/ipv4/ip_tunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ static struct net_device *__ip_tunnel_create(struct net *net,
static inline void init_tunnel_flow(struct flowi4 *fl4,
int proto,
__be32 daddr, __be32 saddr,
__be32 key, __u8 tos, int oif)
__be32 key, __u8 tos, int oif,
__u32 mark)
{
memset(fl4, 0, sizeof(*fl4));
fl4->flowi4_oif = oif;
Expand All @@ -302,6 +303,7 @@ static inline void init_tunnel_flow(struct flowi4 *fl4,
fl4->flowi4_tos = tos;
fl4->flowi4_proto = proto;
fl4->fl4_gre_key = key;
fl4->flowi4_mark = mark;
}

static int ip_tunnel_bind_dev(struct net_device *dev)
Expand All @@ -322,7 +324,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)

init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
RT_TOS(iph->tos), tunnel->parms.link);
RT_TOS(iph->tos), tunnel->parms.link,
tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);

if (!IS_ERR(rt)) {
Expand Down Expand Up @@ -578,7 +581,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
RT_TOS(tos), tunnel->parms.link);
RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
Expand Down Expand Up @@ -707,7 +710,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}

init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
tunnel->fwmark);

if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
Expand Down Expand Up @@ -795,7 +799,8 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
struct ip_tunnel_parm *p,
bool set_mtu)
bool set_mtu,
__u32 fwmark)
{
ip_tunnel_del(itn, t);
t->parms.iph.saddr = p->iph.saddr;
Expand All @@ -812,10 +817,11 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->parms.iph.tos = p->iph.tos;
t->parms.iph.frag_off = p->iph.frag_off;

if (t->parms.link != p->link) {
if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;

t->parms.link = p->link;
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
dev->mtu = mtu;
Expand Down Expand Up @@ -893,7 +899,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)

if (t) {
err = 0;
ip_tunnel_update(itn, t, dev, p, true);
ip_tunnel_update(itn, t, dev, p, true, 0);
} else {
err = -ENOENT;
}
Expand Down Expand Up @@ -1066,7 +1072,7 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);

int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p)
struct ip_tunnel_parm *p, __u32 fwmark)
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
Expand All @@ -1087,6 +1093,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],

nt->net = net;
nt->parms = *p;
nt->fwmark = fwmark;
err = register_netdevice(dev);
if (err)
goto out;
Expand All @@ -1105,7 +1112,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);

int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p)
struct ip_tunnel_parm *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
Expand Down Expand Up @@ -1137,7 +1144,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
}
}

ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
Expand Down
20 changes: 15 additions & 5 deletions net/ipv4/ip_vti.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}

static void vti_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
struct ip_tunnel_parm *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));

Expand All @@ -497,24 +498,29 @@ static void vti_netlink_parms(struct nlattr *data[],
if (data[IFLA_VTI_REMOTE])
parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]);

if (data[IFLA_VTI_FWMARK])
*fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]);
}

static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm parms;
__u32 fwmark = 0;

vti_netlink_parms(data, &parms);
return ip_tunnel_newlink(dev, tb, &parms);
vti_netlink_parms(data, &parms, &fwmark);
return ip_tunnel_newlink(dev, tb, &parms, fwmark);
}

static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel *t = netdev_priv(dev);
__u32 fwmark = t->fwmark;
struct ip_tunnel_parm p;

vti_netlink_parms(data, &p);
return ip_tunnel_changelink(dev, tb, &p);
vti_netlink_parms(data, &p, &fwmark);
return ip_tunnel_changelink(dev, tb, &p, fwmark);
}

static size_t vti_get_size(const struct net_device *dev)
Expand All @@ -530,6 +536,8 @@ static size_t vti_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_VTI_REMOTE */
nla_total_size(4) +
/* IFLA_VTI_FWMARK */
nla_total_size(4) +
0;
}

Expand All @@ -543,6 +551,7 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark);

return 0;
}
Expand All @@ -553,6 +562,7 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
[IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
[IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
[IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};

static struct rtnl_link_ops vti_link_ops __read_mostly = {
Expand Down
24 changes: 17 additions & 7 deletions net/ipv4/ipip.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}

static void ipip_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms, bool *collect_md)
struct ip_tunnel_parm *parms, bool *collect_md,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));

Expand Down Expand Up @@ -428,6 +429,9 @@ static void ipip_netlink_parms(struct nlattr *data[],

if (data[IFLA_IPTUN_COLLECT_METADATA])
*collect_md = true;

if (data[IFLA_IPTUN_FWMARK])
*fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}

/* This function returns true when ENCAP attributes are present in the nl msg */
Expand Down Expand Up @@ -470,6 +474,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
__u32 fwmark = 0;

if (ipip_netlink_encap_parms(data, &ipencap)) {
int err = ip_tunnel_encap_setup(t, &ipencap);
Expand All @@ -478,34 +483,35 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
return err;
}

ipip_netlink_parms(data, &p, &t->collect_md);
return ip_tunnel_newlink(dev, tb, &p);
ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
return ip_tunnel_newlink(dev, tb, &p, fwmark);
}

static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
bool collect_md;
__u32 fwmark = t->fwmark;

if (ipip_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
int err = ip_tunnel_encap_setup(t, &ipencap);

if (err < 0)
return err;
}

ipip_netlink_parms(data, &p, &collect_md);
ipip_netlink_parms(data, &p, &collect_md, &fwmark);
if (collect_md)
return -EINVAL;

if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
return -EINVAL;

return ip_tunnel_changelink(dev, tb, &p);
return ip_tunnel_changelink(dev, tb, &p, fwmark);
}

static size_t ipip_get_size(const struct net_device *dev)
Expand Down Expand Up @@ -535,6 +541,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_COLLECT_METADATA */
nla_total_size(0) +
/* IFLA_IPTUN_FWMARK */
nla_total_size(4) +
0;
}

Expand All @@ -550,7 +558,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
!!(parm->iph.frag_off & htons(IP_DF))) ||
nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;

if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
Expand Down Expand Up @@ -585,6 +594,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};

static struct rtnl_link_ops ipip_link_ops __read_mostly = {
Expand Down
Loading

0 comments on commit 9830ad4

Please sign in to comment.