Skip to content

Commit

Permalink
ipv4: fix fnhe usage by non-cached routes
Browse files Browse the repository at this point in the history
Allow some non-cached routes to use non-expired fnhe:

1. ip_del_fnhe: moved above and now called by find_exception.
The 4.5+ commit deed49d expires fnhe only when caching
routes. Change that to:

1.1. use fnhe for non-cached local output routes, with the help
from (2)

1.2. allow __mkroute_input to detect expired fnhe (outdated
fnhe_gw, for example) when do_cache is false, eg. when itag!=0
for unicast destinations.

2. __mkroute_output: keep fi to allow local routes with orig_oif != 0
to use fnhe info even when the new route will not be cached into fnhe.
After commit 839da4d ("net: ipv4: set orig_oif based on fib
result for local traffic") it means all local routes will be affected
because they are not cached. This change is used to solve a PMTU
problem with IPVS (and probably Netfilter DNAT) setups that redirect
local clients from target local IP (local route to Virtual IP)
to new remote IP target, eg. IPVS TUN real server. Loopback has
64K MTU and we need to create fnhe on the local route that will
keep the reduced PMTU for the Virtual IP. Without this change
fnhe_pmtu is updated from ICMP but never exposed to non-cached
local routes. This includes routes with flowi4_oif!=0 for 4.6+ and
with flowi4_oif=any for 4.14+).

3. update_or_create_fnhe: make sure fnhe_expires is not 0 for
new entries

Fixes: 839da4d ("net: ipv4: set orig_oif based on fib result for local traffic")
Fixes: d6d5e99 ("route: do not cache fib route info on local routes with oif")
Fixes: deed49d ("route: check and remove route cache when we get route")
Cc: David Ahern <[email protected]>
Cc: Xin Long <[email protected]>
Signed-off-by: Julian Anastasov <[email protected]>
Acked-by: David Ahern <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Julian Anastasov authored and davem330 committed May 3, 2018
1 parent e002434 commit 94720e3
Showing 1 changed file with 53 additions and 65 deletions.
118 changes: 53 additions & 65 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = expires;
fnhe->fnhe_expires = max(1UL, expires);

/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
Expand Down Expand Up @@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}

static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe, __rcu **fnhe_p;
u32 hval = fnhe_hashfun(daddr);

spin_lock_bh(&fnhe_lock);

hash = rcu_dereference_protected(nh->nh_exceptions,
lockdep_is_held(&fnhe_lock));
hash += hval;

fnhe_p = &hash->chain;
fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
while (fnhe) {
if (fnhe->fnhe_daddr == daddr) {
rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
fnhe_flush_routes(fnhe);
kfree_rcu(fnhe, rcu);
break;
}
fnhe_p = &fnhe->fnhe_next;
fnhe = rcu_dereference_protected(fnhe->fnhe_next,
lockdep_is_held(&fnhe_lock));
}

spin_unlock_bh(&fnhe_lock);
}

static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
Expand All @@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)

for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
if (fnhe->fnhe_daddr == daddr)
if (fnhe->fnhe_daddr == daddr) {
if (fnhe->fnhe_expires &&
time_after(jiffies, fnhe->fnhe_expires)) {
ip_del_fnhe(nh, daddr);
break;
}
return fnhe;
}
}
return NULL;
}
Expand Down Expand Up @@ -1636,36 +1672,6 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}

static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe, __rcu **fnhe_p;
u32 hval = fnhe_hashfun(daddr);

spin_lock_bh(&fnhe_lock);

hash = rcu_dereference_protected(nh->nh_exceptions,
lockdep_is_held(&fnhe_lock));
hash += hval;

fnhe_p = &hash->chain;
fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
while (fnhe) {
if (fnhe->fnhe_daddr == daddr) {
rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
fnhe_flush_routes(fnhe);
kfree_rcu(fnhe, rcu);
break;
}
fnhe_p = &fnhe->fnhe_next;
fnhe = rcu_dereference_protected(fnhe->fnhe_next,
lockdep_is_held(&fnhe_lock));
}

spin_unlock_bh(&fnhe_lock);
}

/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
Expand Down Expand Up @@ -1719,20 +1725,10 @@ static int __mkroute_input(struct sk_buff *skb,

fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
if (fnhe) {
if (fnhe)
rth = rcu_dereference(fnhe->fnhe_rth_input);
if (rth && rth->dst.expires &&
time_after(jiffies, rth->dst.expires)) {
ip_del_fnhe(&FIB_RES_NH(*res), daddr);
fnhe = NULL;
} else {
goto rt_cache;
}
}

rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);

rt_cache:
else
rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
Expand Down Expand Up @@ -2216,39 +2212,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
* the loopback interface and the IP_PKTINFO ipi_ifindex will
* be set to the loopback interface as well.
*/
fi = NULL;
do_cache = false;
}

fnhe = NULL;
do_cache &= fi != NULL;
if (do_cache) {
if (fi) {
struct rtable __rcu **prth;
struct fib_nh *nh = &FIB_RES_NH(*res);

fnhe = find_exception(nh, fl4->daddr);
if (!do_cache)
goto add;
if (fnhe) {
prth = &fnhe->fnhe_rth_output;
rth = rcu_dereference(*prth);
if (rth && rth->dst.expires &&
time_after(jiffies, rth->dst.expires)) {
ip_del_fnhe(nh, fl4->daddr);
fnhe = NULL;
} else {
goto rt_cache;
} else {
if (unlikely(fl4->flowi4_flags &
FLOWI_FLAG_KNOWN_NH &&
!(nh->nh_gw &&
nh->nh_scope == RT_SCOPE_LINK))) {
do_cache = false;
goto add;
}
prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}

if (unlikely(fl4->flowi4_flags &
FLOWI_FLAG_KNOWN_NH &&
!(nh->nh_gw &&
nh->nh_scope == RT_SCOPE_LINK))) {
do_cache = false;
goto add;
}
prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);

rt_cache:
if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
return rth;
}
Expand Down

0 comments on commit 94720e3

Please sign in to comment.