Skip to content

Commit

Permalink
ipv6: fix src addr routing with the exception table
Browse files Browse the repository at this point in the history
When inserting route cache into the exception table, the key is
generated with both src_addr and dest_addr with src addr routing.
However, current logic always assumes the src_addr used to generate the
key is a /128 host address. This is not true in the following scenarios:
1. When the route is a gateway route or does not have next hop.
   (rt6_is_gw_or_nonexthop() == false)
2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL.
This means, when looking for a route cache in the exception table, we
have to do the lookup twice: first time with the passed in /128 host
address, second time with the src_addr stored in fib6_info.

This solves the pmtu discovery issue reported by Mikael Magnusson where
a route cache with a lower mtu info is created for a gateway route with
src addr. However, the lookup code is not able to find this route cache.

Fixes: 2b760fc ("ipv6: hook up exception table to store dst cache")
Reported-by: Mikael Magnusson <[email protected]>
Bisected-by: David Ahern <[email protected]>
Signed-off-by: Wei Wang <[email protected]>
Cc: Martin Lau <[email protected]>
Cc: Eric Dumazet <[email protected]>
Acked-by: Martin KaFai Lau <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
tracywwnj authored and davem330 committed May 16, 2019
1 parent 9a6c8bf commit 510e2ce
Showing 1 changed file with 27 additions and 24 deletions.
51 changes: 27 additions & 24 deletions net/ipv6/route.c
Original file line number Diff line number Diff line change
@@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr);
const struct in6_addr *daddr,
const struct in6_addr *saddr);

#ifdef CONFIG_IPV6_ROUTE_INFO
static struct fib6_info *rt6_add_route_info(struct net *net,
@@ -1573,31 +1573,44 @@ void rt6_flush_exceptions(struct fib6_info *rt)
* Caller has to hold rcu_read_lock()
*/
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr)
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct rt6_info *ret = NULL;

bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);

#ifdef CONFIG_IPV6_SUBTREES
/* fib6i_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
* both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
* a hash of only fib6_dst.
* However, the src addr used to create the hash
* might not be exactly the passed in saddr which
* is a /128 addr from the flow.
* So we need to use f6i->fib6_src to redo lookup
* if the passed in saddr does not find anything.
* (See the logic in ip6_rt_cache_alloc() on how
* rt->rt6i_src is updated.)
*/
if (res->f6i->fib6_src.plen)
src_key = saddr;
find_ex:
#endif
bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);

if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
ret = rt6_ex->rt6i;

#ifdef CONFIG_IPV6_SUBTREES
/* Use fib6_src as src_key and redo lookup */
if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
src_key = &res->f6i->fib6_src.addr;
goto find_ex;
}
#endif

return ret;
}

@@ -2672,12 +2685,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
const struct fib6_nh *nh = res->nh;
struct fib6_info *f6i = res->f6i;
const struct in6_addr *src_key;
struct rt6_exception *rt6_ex;
struct inet6_dev *idev;
struct rt6_info *rt;
u32 mtu = 0;

if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
@@ -2686,18 +2697,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
goto out;
}

src_key = NULL;
#ifdef CONFIG_IPV6_SUBTREES
if (f6i->fib6_src.plen)
src_key = saddr;
#endif

bucket = rcu_dereference(f6i->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);

if (likely(!mtu)) {
rt = rt6_find_cached_rt(res, daddr, saddr);
if (unlikely(rt)) {
mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
} else {
struct net_device *dev = nh->fib_nh_dev;

mtu = IPV6_MIN_MTU;

0 comments on commit 510e2ce

Please sign in to comment.