Skip to content

Commit

Permalink
Merge branch 'Remove-rtnl-lock-dependency-from-all-action-implementat…
Browse files Browse the repository at this point in the history
…ions'

Vlad Buslov says:

====================
Remove rtnl lock dependency from all action implementations

Currently, all netlink protocol handlers for updating rules, actions and
qdiscs are protected with single global rtnl lock which removes any
possibility for parallelism. This patch set is a second step to remove
rtnl lock dependency from TC rules update path.

Recently, new rtnl registration flag RTNL_FLAG_DOIT_UNLOCKED was added.
Handlers registered with this flag are called without RTNL taken. End
goal is to have rule update handlers(RTM_NEWTFILTER, RTM_DELTFILTER,
etc.) to be registered with UNLOCKED flag to allow parallel execution.
However, there is no intention to completely remove or split rtnl lock
itself. This patch set addresses specific problems in implementation of
tc actions that prevent their control path from being executed
concurrently. Additional changes are required to refactor classifiers
API and individual classifiers for parallel execution. This patch set
lays groundwork to eventually register rule update handlers as
rtnl-unlocked.

Action API is already prepared for parallel execution with previous
patch set, which means that action ops that use action API for their
implementation do not require additional modifications. (delete, search,
etc.) Action API implements concurrency-safe reference counting and
guarantees that cleanup/delete is called only once, after last reference
to action is released.

The goal of this change is to update specific actions APIs that access
action private state directly, in order to be independent from external
locking. General approach is to re-use existing tcf_lock spinlock (used
by some action implementation to synchronize control path with data
path) to protect action private state from concurrent modification. If
action has rcu-protected pointer, tcf spinlock is used to protect its
update code, instead of relying on rtnl lock.

Some actions need to determine rtnl mutex status in order to release it.
For example, ife action can load additional kernel modules(meta ops) and
must make sure that no locks are held during module load. In such cases
'rtnl_held' argument is used to conditionally release rtnl mutex.

Changes from V1 to V2:
- Patch 12:
  - new patch
- Patch 14:
  - refactor gen_new_estimator() to reuse stats_lock when re-assigning
    rate estimator statistics pointer
- Remove mirred and tunnel_key helper function changes. (to be submitted
  and standalone patch)
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 11, 2018
2 parents 2b14e1e + e329bc4 commit 9a95d9c
Show file tree
Hide file tree
Showing 17 changed files with 214 additions and 122 deletions.
1 change: 1 addition & 0 deletions include/net/act_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ struct tc_action_ops {
void (*stats_update)(struct tc_action *, u64, u32, u64);
size_t (*get_fill_size)(const struct tc_action *act);
struct net_device *(*get_dev)(const struct tc_action *a);
void (*put_dev)(struct net_device *dev);
int (*delete)(struct net *net, u32 index);
};

Expand Down
4 changes: 2 additions & 2 deletions include/net/gen_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
spinlock_t *stats_lock,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
Expand Down
21 changes: 13 additions & 8 deletions net/core/gen_estimator.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static void est_timer(struct timer_list *t)
* @bstats: basic statistics
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @lock: lock for statistics and control path
* @running: qdisc running seqcount
* @opt: rate estimator configuration TLV
*
Expand All @@ -128,7 +128,7 @@ static void est_timer(struct timer_list *t)
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
spinlock_t *lock,
seqcount_t *running,
struct nlattr *opt)
{
Expand All @@ -154,19 +154,22 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
seqcount_init(&est->seq);
intvl_log = parm->interval + 2;
est->bstats = bstats;
est->stats_lock = stats_lock;
est->stats_lock = lock;
est->running = running;
est->ewma_log = parm->ewma_log;
est->intvl_log = intvl_log;
est->cpu_bstats = cpu_bstats;

if (stats_lock)
if (lock)
local_bh_disable();
est_fetch_counters(est, &b);
if (stats_lock)
if (lock)
local_bh_enable();
est->last_bytes = b.bytes;
est->last_packets = b.packets;

if (lock)
spin_lock_bh(lock);
old = rcu_dereference_protected(*rate_est, 1);
if (old) {
del_timer_sync(&old->timer);
Expand All @@ -179,6 +182,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
mod_timer(&est->timer, est->next_jiffies);

rcu_assign_pointer(*rate_est, est);
if (lock)
spin_unlock_bh(lock);
if (old)
kfree_rcu(old, rcu);
return 0;
Expand Down Expand Up @@ -209,7 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @bstats: basic statistics
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @lock: lock for statistics and control path
* @running: qdisc running seqcount (might be NULL)
* @opt: rate estimator configuration TLV
*
Expand All @@ -221,11 +226,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt)
{
return gen_new_estimator(bstats, cpu_bstats, rate_est,
stats_lock, running, opt);
lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

Expand Down
10 changes: 7 additions & 3 deletions net/sched/act_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,12 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
.index = prog->tcf_index,
.refcnt = refcount_read(&prog->tcf_refcnt) - ref,
.bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
.action = prog->tcf_action,
};
struct tcf_t tm;
int ret;

spin_lock(&prog->tcf_lock);
opt.action = prog->tcf_action;
if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
goto nla_put_failure;

Expand All @@ -163,9 +164,11 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
TCA_ACT_BPF_PAD))
goto nla_put_failure;

spin_unlock(&prog->tcf_lock);
return skb->len;

nla_put_failure:
spin_unlock(&prog->tcf_lock);
nlmsg_trim(skb, tp);
return -1;
}
Expand Down Expand Up @@ -264,7 +267,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
{
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
/* updates to prog->filter are prevented, since it's called either
* with rtnl lock or during final cleanup in rcu callback
* with tcf lock or during final cleanup in rcu callback
*/
cfg->filter = rcu_dereference_protected(prog->filter, 1);

Expand Down Expand Up @@ -336,8 +339,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
goto out;

prog = to_bpf(*act);
ASSERT_RTNL();

spin_lock(&prog->tcf_lock);
if (res != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old);

Expand All @@ -349,6 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,

prog->tcf_action = parm->action;
rcu_assign_pointer(prog->filter, cfg.filter);
spin_unlock(&prog->tcf_lock);

if (res == ACT_P_CREATED) {
tcf_idr_insert(tn, *act);
Expand Down
24 changes: 15 additions & 9 deletions net/sched/act_csum.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_old, *params_new;
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
struct tcf_csum *p;
Expand Down Expand Up @@ -88,20 +88,22 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}

p = to_tcf_csum(*a);
ASSERT_RTNL();

params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
tcf_idr_release(*a, bind);
return -ENOMEM;
}
params_old = rtnl_dereference(p->params);
params_new->update_flags = parm->update_flags;

spin_lock(&p->tcf_lock);
p->tcf_action = parm->action;
params_new->update_flags = parm->update_flags;
rcu_assign_pointer(p->params, params_new);
if (params_old)
kfree_rcu(params_old, rcu);
rcu_swap_protected(p->params, params_new,
lockdep_is_held(&p->tcf_lock));
spin_unlock(&p->tcf_lock);

if (params_new)
kfree_rcu(params_new, rcu);

if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
Expand Down Expand Up @@ -599,11 +601,13 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
.index = p->tcf_index,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
.action = p->tcf_action,
};
struct tcf_t t;

params = rtnl_dereference(p->params);
spin_lock(&p->tcf_lock);
params = rcu_dereference_protected(p->params,
lockdep_is_held(&p->tcf_lock));
opt.action = p->tcf_action;
opt.update_flags = params->update_flags;

if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
Expand All @@ -612,10 +616,12 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
goto nla_put_failure;
spin_unlock(&p->tcf_lock);

return skb->len;

nla_put_failure:
spin_unlock(&p->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
Expand Down
10 changes: 8 additions & 2 deletions net/sched/act_gact.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,

gact = to_gact(*a);

ASSERT_RTNL();
spin_lock(&gact->tcf_lock);
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (p_parm) {
Expand All @@ -126,6 +126,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
gact->tcfg_ptype = p_parm->ptype;
}
#endif
spin_unlock(&gact->tcf_lock);

if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
Expand Down Expand Up @@ -178,10 +180,11 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
.index = gact->tcf_index,
.refcnt = refcount_read(&gact->tcf_refcnt) - ref,
.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
.action = gact->tcf_action,
};
struct tcf_t t;

spin_lock(&gact->tcf_lock);
opt.action = gact->tcf_action;
if (nla_put(skb, TCA_GACT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
#ifdef CONFIG_GACT_PROB
Expand All @@ -199,9 +202,12 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD))
goto nla_put_failure;
spin_unlock(&gact->tcf_lock);

return skb->len;

nla_put_failure:
spin_unlock(&gact->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
Expand Down
40 changes: 25 additions & 15 deletions net/sched/act_ife.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ static const char *ife_meta_id2name(u32 metaid)
* under ife->tcf_lock for existing action
*/
static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
void *val, int len, bool exists)
void *val, int len, bool exists,
bool rtnl_held)
{
struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret = 0;
Expand All @@ -278,9 +279,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
#ifdef CONFIG_MODULES
if (exists)
spin_unlock_bh(&ife->tcf_lock);
rtnl_unlock();
if (rtnl_held)
rtnl_unlock();
request_module("ife-meta-%s", ife_meta_id2name(metaid));
rtnl_lock();
if (rtnl_held)
rtnl_lock();
if (exists)
spin_lock_bh(&ife->tcf_lock);
ops = find_ife_oplist(metaid);
Expand Down Expand Up @@ -421,7 +424,7 @@ static void tcf_ife_cleanup(struct tc_action *a)

/* under ife->tcf_lock for existing action */
static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
bool exists)
bool exists, bool rtnl_held)
{
int len = 0;
int rc = 0;
Expand All @@ -433,7 +436,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
val = nla_data(tb[i]);
len = nla_len(tb[i]);

rc = load_metaops_and_vet(ife, i, val, len, exists);
rc = load_metaops_and_vet(ife, i, val, len, exists,
rtnl_held);
if (rc != 0)
return rc;

Expand All @@ -454,7 +458,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
struct tcf_ife_params *p, *p_old;
struct tcf_ife_params *p;
struct tcf_ife_info *ife;
u16 ife_type = ETH_P_IFE;
struct tc_ife *parm;
Expand Down Expand Up @@ -558,7 +562,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
return err;
}

err = populate_metalist(ife, tb2, exists);
err = populate_metalist(ife, tb2, exists, rtnl_held);
if (err)
goto metadata_parse_err;

Expand All @@ -581,13 +585,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}

ife->tcf_action = parm->action;
/* protected by tcf_lock when modifying existing action */
rcu_swap_protected(ife->params, p, 1);

if (exists)
spin_unlock_bh(&ife->tcf_lock);

p_old = rtnl_dereference(ife->params);
rcu_assign_pointer(ife->params, p);
if (p_old)
kfree_rcu(p_old, rcu);
if (p)
kfree_rcu(p, rcu);

if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
Expand All @@ -600,16 +604,20 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p = rtnl_dereference(ife->params);
struct tcf_ife_params *p;
struct tc_ife opt = {
.index = ife->tcf_index,
.refcnt = refcount_read(&ife->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind,
.action = ife->tcf_action,
.flags = p->flags,
};
struct tcf_t t;

spin_lock_bh(&ife->tcf_lock);
opt.action = ife->tcf_action;
p = rcu_dereference_protected(ife->params,
lockdep_is_held(&ife->tcf_lock));
opt.flags = p->flags;

if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;

Expand All @@ -635,9 +643,11 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
pr_info("Failed to dump metalist\n");
}

spin_unlock_bh(&ife->tcf_lock);
return skb->len;

nla_put_failure:
spin_unlock_bh(&ife->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
Expand Down
Loading

0 comments on commit 9a95d9c

Please sign in to comment.