Skip to content

Commit

Permalink
Use epoch(9) for rtentries to simplify control plane operations.
Browse files Browse the repository at this point in the history
Currently the only reason of refcounting rtentries is the need to report
 the rtable operation details immediately after the execution.
Delaying rtentry reclamation allows to stop refcounting and simplify the code.
Additionally, this change allows to reimplement rib_lookup_info(), which
 is used by some of the customers to get the matching prefix along
 with nexthops, in more efficient way.

The change keeps per-vnet rtzone uma zone. It adds nh_vnet field to
 nhop_priv to be able to reliably set curvnet even during vnet teardown.
Rest of the reference counting code will be removed in the D24867 .

Differential Revision:	https://reviews.freebsd.org/D24866
  • Loading branch information
AlexanderChernikov committed May 23, 2020
1 parent 016fc6d commit 2bbab0a
Show file tree
Hide file tree
Showing 12 changed files with 95 additions and 92 deletions.
3 changes: 3 additions & 0 deletions sys/fs/nfsclient/nfs_clvfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -465,18 +465,21 @@ nfs_mountroot(struct mount *mp)
if (nd->mygateway.sin_len != 0 &&
nd->mygateway.sin_addr.s_addr != 0) {
struct sockaddr_in mask, sin;
struct epoch_tracker et;

bzero((caddr_t)&mask, sizeof(mask));
sin = mask;
sin.sin_family = AF_INET;
sin.sin_len = sizeof(sin);
/* XXX MRT use table 0 for this sort of thing */
NET_EPOCH_ENTER(et);
CURVNET_SET(TD_TO_VNET(td));
error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
(struct sockaddr *)&nd->mygateway,
(struct sockaddr *)&mask,
RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
CURVNET_RESTORE();
NET_EPOCH_EXIT(et);
if (error)
panic("nfs_mountroot: RTM_ADD: %d", error);
}
Expand Down
4 changes: 2 additions & 2 deletions sys/net/if.c
Original file line number Diff line number Diff line change
Expand Up @@ -1854,18 +1854,17 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,

ifp = ifa->ifa_ifp;

NET_EPOCH_ENTER(et);
bzero(&info, sizeof(info));
if (cmd != RTM_DELETE)
info.rti_ifp = V_loif;
if (cmd == RTM_ADD) {
/* explicitly specify (loopback) ifa */
if (info.rti_ifp != NULL) {
NET_EPOCH_ENTER(et);
rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
if (rti_ifa != NULL)
ifa_ref(rti_ifa);
info.rti_ifa = rti_ifa;
NET_EPOCH_EXIT(et);
}
}
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
Expand All @@ -1874,6 +1873,7 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);

error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
NET_EPOCH_EXIT(et);

if (rti_ifa != NULL)
ifa_free(rti_ifa);
Expand Down
123 changes: 54 additions & 69 deletions sys/net/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,6 @@ VNET_PCPUSTAT_SYSUNINIT(rtstat);
VNET_DEFINE(struct rib_head *, rt_tables);
#define V_rt_tables VNET(rt_tables)

VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
#define V_rttrash VNET(rttrash)


/*
* Convert a 'struct radix_node *' to a 'struct rtentry *'.
Expand All @@ -148,6 +145,7 @@ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
static struct rtentry *rt_unlinkrte(struct rib_head *rnh,
struct rt_addrinfo *info, int *perror);
static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
static void destroy_rtentry_epoch(epoch_context_t ctx);
#ifdef RADIX_MPATH
static struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
struct rt_addrinfo *info, struct rtentry *rto, int *perror);
Expand Down Expand Up @@ -332,6 +330,16 @@ vnet_route_uninit(const void *unused __unused)
}
}

/*
* dom_rtdetach calls rt_table_destroy(), which
* schedules deletion for all rtentries, nexthops and control
* structures. Wait for the destruction callbacks to fire.
* Note that this should result in freeing all rtentries, but
* nexthops deletions will be scheduled for the next epoch run
* and will be completed after vnet teardown.
*/
epoch_drain_callbacks(net_epoch_preempt);

free(V_rt_tables, M_RTABLE);
uma_zdestroy(V_rtzone);
}
Expand Down Expand Up @@ -449,41 +457,54 @@ rtfree(struct rtentry *rt)
if ((rt->rt_flags & RTF_UP) == 0) {
if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic("rtfree 2");
/*
* the rtentry must have been removed from the routing table
* so it is represented in rttrash.. remove that now.
*/
V_rttrash--;
#ifdef DIAGNOSTIC
if (rt->rt_refcnt < 0) {
printf("rtfree: %p not freed (neg refs)\n", rt);
goto done;
}
#endif

/* Unreference nexthop */
nhop_free(rt->rt_nhop);
epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
&rt->rt_epoch_ctx);

/*
* and the rtentry itself of course
* FALLTHROUGH to RT_UNLOCK() so the reporting functions
* have consistent behaviour of operating on unlocked entry.
*/
uma_zfree(V_rtzone, rt);
return;
}
done:
RT_UNLOCK(rt);
}

static void
destroy_rtentry(struct rtentry *rt)
{

/*
* At this moment rnh, nh_control may be already freed.
* nhop interface may have been migrated to a different vnet.
* Use vnet stored in the nexthop to delete the entry.
*/
CURVNET_SET(nhop_get_vnet(rt->rt_nhop));

/* Unreference nexthop */
nhop_free(rt->rt_nhop);

uma_zfree(V_rtzone, rt);

CURVNET_RESTORE();
}

/*
* Temporary RTFREE() function wrapper.
* Intended to use in control plane code to
* avoid exposing internal layout of 'struct rtentry'.
* Epoch callback indicating rtentry is safe to destroy
*/
void
rtfree_func(struct rtentry *rt)
static void
destroy_rtentry_epoch(epoch_context_t ctx)
{
struct rtentry *rt;

rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);

RTFREE(rt);
destroy_rtentry(rt);
}

/*
Expand Down Expand Up @@ -546,7 +567,7 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,

RT_LOCK(rt);
flags = rt->rt_flags;
RTFREE_LOCKED(rt);
RT_UNLOCK(rt);

RTSTAT_INC(rts_dynamic);

Expand Down Expand Up @@ -1112,13 +1133,6 @@ rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
ifa = rt->rt_nhop->nh_ifa;
if (ifa != NULL && ifa->ifa_rtrequest != NULL)
ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info);

/*
* One more rtentry floating around that is not
* linked to the routing table. rttrash will be decremented
* when RTFREE(rt) is eventually called.
*/
V_rttrash++;
}


Expand Down Expand Up @@ -1386,6 +1400,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,

KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
NET_EPOCH_ASSERT();

dst = info->rti_info[RTAX_DST];

Expand Down Expand Up @@ -1580,13 +1595,10 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info);

/*
* actually return a resultant rtentry and
* give the caller a single reference.
* actually return a resultant rtentry
*/
if (ret_nrt) {
if (ret_nrt)
*ret_nrt = rt;
RT_ADDREF(rt);
}
rnh->rnh_gen++; /* Routing table updated */
RT_UNLOCK(rt);

Expand Down Expand Up @@ -1622,15 +1634,13 @@ del_route(struct rib_head *rnh, struct rt_addrinfo *info,

/*
* If the caller wants it, then it can have it,
* but it's up to it to free the rtentry as we won't be
* doing it.
* the entry will be deleted after the end of the current epoch.
*/
if (ret_nrt) {
if (ret_nrt)
*ret_nrt = rt;
RT_UNLOCK(rt);
} else
RTFREE_LOCKED(rt);


RTFREE_LOCKED(rt);

return (0);
}

Expand Down Expand Up @@ -1736,10 +1746,8 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest)
nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info);

if (ret_nrt != NULL) {
if (ret_nrt != NULL)
*ret_nrt = rt;
RT_ADDREF(rt);
}

RT_UNLOCK(rt);

Expand All @@ -1757,16 +1765,13 @@ static int
change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rtentry **ret_nrt)
{
struct epoch_tracker et;
int error;

/* Check if updated gateway exists */
if ((info->rti_flags & RTF_GATEWAY) &&
(info->rti_info[RTAX_GATEWAY] == NULL))
return (EINVAL);

NET_EPOCH_ENTER(et);

/*
* route change is done in multiple steps, with dropping and
* reacquiring lock. In the situations with multiple processes
Expand All @@ -1779,7 +1784,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
if (error != EAGAIN)
break;
}
NET_EPOCH_EXIT(et);

return (error);
}
Expand Down Expand Up @@ -1825,6 +1829,7 @@ static inline int
rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
{
RIB_RLOCK_TRACKER;
struct epoch_tracker et;
struct sockaddr *dst;
struct sockaddr *netmask;
struct rtentry *rt = NULL;
Expand Down Expand Up @@ -1957,38 +1962,18 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
else
info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
info.rti_info[RTAX_NETMASK] = netmask;
NET_EPOCH_ENTER(et);
error = rtrequest1_fib(cmd, &info, &rt, fibnum);
if (error == 0 && rt != NULL) {
/*
* notify any listening routing agents of the change
*/
RT_LOCK(rt);

/* TODO: interface routes/aliases */
RT_ADDREF(rt);
RT_UNLOCK(rt);
rt_newaddrmsg_fib(cmd, ifa, rt, fibnum);
RT_LOCK(rt);
RT_REMREF(rt);
if (cmd == RTM_DELETE) {
/*
* If we are deleting, and we found an entry,
* then it's been removed from the tree..
* now throw it away.
*/
RTFREE_LOCKED(rt);
} else {
if (cmd == RTM_ADD) {
/*
* We just wanted to add it..
* we don't actually need a reference.
*/
RT_REMREF(rt);
}
RT_UNLOCK(rt);
}
didwork = 1;
}
NET_EPOCH_EXIT(et);
if (error)
a_failure = error;
}
Expand Down
2 changes: 0 additions & 2 deletions sys/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,6 @@ struct rt_addrinfo {
#define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
|| (ifp)->if_link_state == LINK_STATE_UP)

#define RTFREE_FUNC(_rt) rtfree_func(_rt)

#define RO_NHFREE(_ro) do { \
if ((_ro)->ro_nh) { \
NH_FREE((_ro)->ro_nh); \
Expand Down
1 change: 1 addition & 0 deletions sys/net/route/nhop.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ struct rib_head;
uint32_t nhop_get_idx(const struct nhop_object *nh);
enum nhop_type nhop_get_type(const struct nhop_object *nh);
int nhop_get_rtflags(const struct nhop_object *nh);
struct vnet *nhop_get_vnet(const struct nhop_object *nh);

#endif /* _KERNEL */

Expand Down
10 changes: 10 additions & 0 deletions sys/net/route/nhop_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,9 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
return (ENOMEM);
}

/* Save vnet to ease destruction */
nh_priv->nh_vnet = curvnet;

/* Reference external objects and calculate (referenced) ifa */
if_ref(nh->nh_ifp);
ifa_ref(nh->nh_ifa);
Expand Down Expand Up @@ -698,6 +701,13 @@ nhop_set_rtflags(struct nhop_object *nh, int rt_flags)
nh->nh_priv->rt_flags = rt_flags;
}

struct vnet *
nhop_get_vnet(const struct nhop_object *nh)
{

return (nh->nh_priv->nh_vnet);
}

void
nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu)
{
Expand Down
1 change: 1 addition & 0 deletions sys/net/route/nhop_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct nhop_priv {
struct nhop_object *nh; /* backreference to the dataplane nhop */
struct nh_control *nh_control; /* backreference to the rnh */
struct nhop_priv *nh_next; /* hash table membership */
struct vnet *nh_vnet; /* vnet nhop belongs to */
struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
};

Expand Down
2 changes: 2 additions & 0 deletions sys/net/route/route_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#ifndef RNF_NORMAL
#include <net/radix.h>
#endif
#include <sys/epoch.h>
#include <netinet/in.h> /* struct sockaddr_in */
#include <sys/counter.h>

Expand Down Expand Up @@ -148,6 +149,7 @@ struct rtentry {
#define rt_endzero rt_mtx
struct mtx rt_mtx; /* mutex for routing entry */
struct rtentry *rt_chain; /* pointer to next rtentry to delete */
struct epoch_context rt_epoch_ctx; /* net epoch tracker */
};

#define RT_LOCK_INIT(_rt) \
Expand Down
Loading

0 comments on commit 2bbab0a

Please sign in to comment.