Skip to content

Commit

Permalink
Merge branch 'ipv4-avoid-pathological-hash-tables'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
ipv4: avoid pathological hash tables

This series speeds up netns dismantles on hosts
having many active netns, by making sure two hash tables
used for IPV4 fib contains uniformly spread items.

v2: changed second patch to add fib_info_laddrhash_bucket()
    for consistency (David Ahern suggestion).
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Jan 19, 2022
2 parents 8eb896a + 79eb15d commit ff9fc0a
Showing 1 changed file with 32 additions and 33 deletions.
65 changes: 32 additions & 33 deletions net/ipv4/fib_semantics.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
#include <linux/hash.h>

#include <net/arp.h>
#include <net/ip.h>
Expand All @@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;

#define DEVINDEX_HASHBITS 8
Expand Down Expand Up @@ -319,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)

static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
unsigned int mask = DEVINDEX_HASHSIZE - 1;
return hash_32(val, DEVINDEX_HASHBITS);
}

static struct hlist_head *
fib_info_devhash_bucket(const struct net_device *dev)
{
u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;

return (val ^
(val >> DEVINDEX_HASHBITS) ^
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
return &fib_info_devhash[fib_devindex_hashfn(val)];
}

static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
Expand Down Expand Up @@ -433,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
unsigned int hash;

spin_lock(&fib_info_lock);

hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
head = fib_info_devhash_bucket(dev);

hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
Expand Down Expand Up @@ -1243,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}

static inline unsigned int fib_laddr_hashfn(__be32 val)
static struct hlist_head *
fib_info_laddrhash_bucket(const struct net *net, __be32 val)
{
unsigned int mask = (fib_info_hash_size - 1);
u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
fib_info_hash_bits);

return ((__force u32)val ^
((__force u32)val >> 7) ^
((__force u32)val >> 14)) & mask;
return &fib_info_laddrhash[slot];
}

static struct hlist_head *fib_info_hash_alloc(int bytes)
Expand Down Expand Up @@ -1285,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
fib_info_hash_bits = ilog2(new_size);

for (i = 0; i < old_size; i++) {
struct hlist_head *head = &fib_info_hash[i];
Expand All @@ -1302,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
fib_info_hash = new_info_hash;

fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
struct hlist_head *lhead = &fib_info_laddrhash[i];
struct hlist_head *lhead = &old_laddrhash[i];
struct hlist_node *n;
struct fib_info *fi;

hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
struct hlist_head *ldest;
unsigned int new_hash;

new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
ldest = &new_laddrhash[new_hash];
ldest = fib_info_laddrhash_bucket(fi->fib_net,
fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, ldest);
}
}
fib_info_laddrhash = new_laddrhash;

spin_unlock_bh(&fib_info_lock);

Expand Down Expand Up @@ -1601,20 +1606,18 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (fi->fib_prefsrc) {
struct hlist_head *head;

head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, head);
}
if (fi->nh) {
list_add(&fi->nh_list, &nh->fi_list);
} else {
change_nexthops(fi) {
struct hlist_head *head;
unsigned int hash;

if (!nexthop_nh->fib_nh_dev)
continue;
hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
head = &fib_info_devhash[hash];
head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
Expand Down Expand Up @@ -1875,16 +1878,16 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
*/
int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
struct hlist_head *head = &fib_info_laddrhash[hash];
int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
struct net *net = dev_net(dev);
struct hlist_head *head;
struct fib_info *fi;
int ret = 0;

if (!fib_info_laddrhash || local == 0)
return 0;

head = fib_info_laddrhash_bucket(net, local);
hlist_for_each_entry(fi, head, fib_lhash) {
if (!net_eq(fi->fib_net, net) ||
fi->fib_tb_id != tb_id)
Expand Down Expand Up @@ -1966,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)

void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
unsigned int hash = fib_devindex_hashfn(dev->ifindex);
struct hlist_head *head = &fib_info_devhash[hash];
struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_nh *nh;

hlist_for_each_entry(nh, head, nh_hash) {
Expand All @@ -1986,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
int ret = 0;
int scope = RT_SCOPE_NOWHERE;
struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_info *prev_fi = NULL;
unsigned int hash = fib_devindex_hashfn(dev->ifindex);
struct hlist_head *head = &fib_info_devhash[hash];
int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
int ret = 0;

if (force)
scope = -1;
Expand Down Expand Up @@ -2136,7 +2137,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
unsigned int hash;
struct hlist_head *head;
struct fib_nh *nh;
int ret;
Expand All @@ -2152,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}

prev_fi = NULL;
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
head = fib_info_devhash_bucket(dev);
ret = 0;

hlist_for_each_entry(nh, head, nh_hash) {
Expand Down

0 comments on commit ff9fc0a

Please sign in to comment.