Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…l/git/netfilter/nf

Florisn Westphal says:

====================
These are netfilter fixes for the *net* tree.

First patch resolves a false-positive lockdep splat:
rcu_dereference is used outside of rcu read lock.  Let lockdep
validate that the transaction mutex is locked.

Second patch fixes a kdoc warning added in previous PR.

Third patch fixes a memory leak:
The catchall element isn't disabled correctly, this allows
userspace to deactivate the element again. This results in refcount
underflow which in turn prevents memory release. This was always
broken since the feature was added in 5.13.

Patch 4 fixes an incorrect change in the previous pull request:
Adding a duplicate key to a set should work if the duplicate key
has expired, restore this behaviour. All from myself.

Patch intel#5 resolves an old historic artifact in sctp conntrack:
a 300ms timeout for shutdown_ack. Increase this to 3s.  From Xin Long.

Patch intel#6 fixes a sysctl data race in ipvs, two threads can clobber the
sysctl value, from Sishuai Gong. This is a day-0 bug that predates git
history.

Patches 7, 8 and 9, from Pablo Neira Ayuso, are also followups
for the previous GC rework in nf_tables: The netlink notifier and the
netns exit path must both increment the gc worker seqcount, else worker
may encounter stale (free'd) pointers.
================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 16, 2023
2 parents b35c968 + 23185c6 commit de4c5ef
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 31 deletions.
4 changes: 2 additions & 2 deletions Documentation/networking/nf_conntrack-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
Default is set to (hb_interval * path_max_retrans + rto_max)

nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
default 0.3
default 3

nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
default 0.3
default 3

nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
default 3
Expand Down
1 change: 1 addition & 0 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
* @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
Expand Down
4 changes: 4 additions & 0 deletions net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
Expand All @@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
.mode = table->mode,
};

mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write) {
Expand All @@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
else
memcpy(valp, val, sizeof(val));
}
mutex_unlock(&ipvs->sync_mutex);
return rc;
}

Expand Down Expand Up @@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
Expand Down
6 changes: 3 additions & 3 deletions net/netfilter/nf_conntrack_proto_sctp.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
};
Expand Down Expand Up @@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
Expand Down
44 changes: 39 additions & 5 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -7091,6 +7091,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
ret = __nft_set_catchall_flush(ctx, set, &elem);
if (ret < 0)
break;
nft_set_elem_change_active(ctx->net, set, ext);
}

return ret;
Expand Down Expand Up @@ -9480,9 +9481,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
if (!trans)
return NULL;

trans->net = maybe_get_net(net);
if (!trans->net) {
kfree(trans);
return NULL;
}

refcount_inc(&set->refs);
trans->set = set;
trans->net = get_net(net);
trans->seq = gc_seq;

return trans;
Expand Down Expand Up @@ -9738,6 +9744,22 @@ static void nft_set_commit_update(struct list_head *set_update_list)
}
}

static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
{
unsigned int gc_seq;

/* Bump gc counter, it becomes odd, this is the busy mark. */
gc_seq = READ_ONCE(nft_net->gc_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);

return gc_seq;
}

static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
{
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
}

static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
Expand Down Expand Up @@ -9823,9 +9845,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)

WRITE_ONCE(nft_net->base_seq, base_seq);

/* Bump gc counter, it becomes odd, this is the busy mark. */
gc_seq = READ_ONCE(nft_net->gc_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
gc_seq = nft_gc_seq_begin(nft_net);

/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
Expand Down Expand Up @@ -10038,7 +10058,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);

WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
nft_gc_seq_end(nft_net, gc_seq);
nf_tables_commit_release(net);

return 0;
Expand Down Expand Up @@ -11039,13 +11059,17 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
struct net *net = n->net;
unsigned int deleted;
bool restart = false;
unsigned int gc_seq;

if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;

nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);

gc_seq = nft_gc_seq_begin(nft_net);

if (!list_empty(&nf_tables_destroy_list))
rcu_barrier();
again:
Expand All @@ -11068,6 +11092,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
if (restart)
goto again;
}
nft_gc_seq_end(nft_net, gc_seq);

mutex_unlock(&nft_net->commit_mutex);

return NOTIFY_DONE;
Expand Down Expand Up @@ -11105,12 +11131,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
unsigned int gc_seq;

mutex_lock(&nft_net->commit_mutex);

gc_seq = nft_gc_seq_begin(nft_net);

if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);

__nft_release_tables(net);

nft_gc_seq_end(nft_net, gc_seq);

mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
Expand Down
3 changes: 3 additions & 0 deletions net/netfilter/nft_dynset.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);

if (set->flags & NFT_SET_OBJECT)
return -EOPNOTSUPP;

if (set->ops->update == NULL)
return -EOPNOTSUPP;

Expand Down
38 changes: 17 additions & 21 deletions net/netfilter/nft_set_pipapo.c
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
goto out;

if (last) {
if (nft_set_elem_expired(&f->mt[b].e->ext))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
Expand Down Expand Up @@ -600,17 +602,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_pipapo_elem *ret;

ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
if (IS_ERR(ret))
return ret;

if (nft_set_elem_expired(&ret->ext))
return ERR_PTR(-ENOENT);

return ret;
}

/**
Expand Down Expand Up @@ -1549,7 +1542,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,

/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @set: nftables API set representation
* @_set: nftables API set representation
* @m: Matching data
*/
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
Expand Down Expand Up @@ -1697,6 +1690,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}

static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
{
#ifdef CONFIG_PROVE_LOCKING
const struct net *net = read_pnet(&set->net);

return lockdep_is_held(&nft_pernet(net)->commit_mutex);
#else
return true;
#endif
}

static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
Expand All @@ -1705,7 +1709,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
if (!priv->dirty)
return;

m = rcu_dereference(priv->match);
m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));

new_clone = pipapo_clone(m);
if (IS_ERR(new_clone))
Expand All @@ -1732,11 +1736,7 @@ static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_pipapo_elem *e;

e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
if (IS_ERR(e))
return;
struct nft_pipapo_elem *e = elem->priv;

nft_set_elem_change_active(net, set, &e->ext);
}
Expand Down Expand Up @@ -1950,10 +1950,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,

data = (const u8 *)nft_set_ext_key(&e->ext);

e = pipapo_get(net, set, data, 0);
if (IS_ERR(e))
return;

while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *match_start, *match_end;
Expand Down

0 comments on commit de4c5ef

Please sign in to comment.