Skip to content

Commit

Permalink
net: sched: Remove Qdisc::running sequence counter
Browse files Browse the repository at this point in the history
The Qdisc::running sequence counter has two uses:

  1. Reliably reading qdisc's tc statistics while the qdisc is running
     (a seqcount read/retry loop at gnet_stats_add_basic()).

  2. As a flag, indicating whether the qdisc in question is running
     (without any retry loops).

For the first usage, the Qdisc::running sequence counter write section,
qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
is actually needed: the raw qdisc's bstats update. A u64_stats sync
point was thus introduced (in previous commits) inside the bstats
structure itself. A local u64_stats write section is then started and
stopped for the bstats updates.

Use that u64_stats sync point mechanism for the bstats read/retry loop
at gnet_stats_add_basic().

For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
accessed with atomic bitops, is sufficient. Using a bit flag instead of
a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
to the SMP barriers implicitly added through raw_read_seqcount() and
write_seqcount_begin/end() getting removed. All call sites have been
surveyed though, and no required ordering was identified.

Now that the qdisc->running sequence counter is no longer used, remove
it.

Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the qdisc tc statistics "packets" vs.
"bytes" values getting out of sync on rare occasions. The individual
values will still be valid.

Signed-off-by: Ahmed S. Darwish <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
a-darwish authored and davem330 committed Oct 18, 2021
1 parent 50dc9a8 commit 29cbcd8
Show file tree
Hide file tree
Showing 21 changed files with 102 additions and 134 deletions.
4 changes: 0 additions & 4 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
* @sfp_bus: attached &struct sfp_bus structure.
*
* @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
* @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
*
* @proto_down: protocol port state information can be sent to the
* switch driver and used to set the phys state of the
Expand Down Expand Up @@ -2250,7 +2249,6 @@ struct net_device {
struct phy_device *phydev;
struct sfp_bus *sfp_bus;
struct lock_class_key *qdisc_tx_busylock;
struct lock_class_key *qdisc_running_key;
bool proto_down;
unsigned wol_enabled:1;
unsigned threaded:1;
Expand Down Expand Up @@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
#define netdev_lockdep_set_classes(dev) \
{ \
static struct lock_class_key qdisc_tx_busylock_key; \
static struct lock_class_key qdisc_running_key; \
static struct lock_class_key qdisc_xmit_lock_key; \
static struct lock_class_key dev_addr_list_lock_key; \
unsigned int i; \
\
(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
(dev)->qdisc_running_key = &qdisc_running_key; \
lockdep_set_class(&(dev)->addr_list_lock, \
&dev_addr_list_lock_key); \
for (i = 0; i < (dev)->num_tx_queues; i++) \
Expand Down
19 changes: 8 additions & 11 deletions include/net/gen_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d,
int padattr);

int gnet_stats_copy_basic(const seqcount_t *running,
struct gnet_dump *d,
int gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b);
void gnet_stats_add_basic(const seqcount_t *running,
struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync *b, bool running);
void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b);
int gnet_stats_copy_basic_hw(const seqcount_t *running,
struct gnet_dump *d,
struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_basic_hw(struct gnet_dump *d,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b);
struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
struct net_rate_estimator __rcu **ptr);
int gnet_stats_copy_queue(struct gnet_dump *d,
Expand All @@ -74,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt);
bool running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt);
bool running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
struct gnet_stats_rate_est64 *sample);
Expand Down
33 changes: 14 additions & 19 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ enum qdisc_state_t {
__QDISC_STATE_DEACTIVATED,
__QDISC_STATE_MISSED,
__QDISC_STATE_DRAINING,
/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
* Use qdisc_run_begin/end() or qdisc_is_running() instead.
*/
__QDISC_STATE_RUNNING,
};

#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
Expand Down Expand Up @@ -108,7 +112,6 @@ struct Qdisc {
struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
struct gnet_stats_basic_sync bstats;
seqcount_t running;
struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
Expand Down Expand Up @@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
return NULL;
}

/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
* root_lock section, or provide their own memory barriers -- ordering
* against qdisc_run_begin/end() atomic bit operations.
*/
static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK)
return spin_is_locked(&qdisc->seqlock);
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}

static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
Expand All @@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
return !READ_ONCE(qdisc->q.qlen);
}

/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
* the qdisc root lock acquired.
*/
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
Expand Down Expand Up @@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
* after it releases the lock at the end of qdisc_run_end().
*/
return spin_trylock(&qdisc->seqlock);
} else if (qdisc_is_running(qdisc)) {
return false;
}
/* Variant of write_seqcount_begin() telling lockdep a trylock
* was attempted.
*/
raw_write_seqcount_begin(&qdisc->running);
seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
return true;
return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}

static inline void qdisc_run_end(struct Qdisc *qdisc)
Expand All @@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
&qdisc->state)))
__netif_schedule(qdisc);
} else {
write_seqcount_end(&qdisc->running);
clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}
}

Expand Down Expand Up @@ -592,14 +595,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
return qdisc_lock(root);
}

static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
{
struct Qdisc *root = qdisc_root_sleeping(qdisc);

ASSERT_RTNL();
return &root->running;
}

static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
{
return qdisc->dev_queue->dev;
Expand Down
16 changes: 10 additions & 6 deletions net/core/gen_estimator.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
struct net_rate_estimator {
struct gnet_stats_basic_sync *bstats;
spinlock_t *stats_lock;
seqcount_t *running;
bool running;
struct gnet_stats_basic_sync __percpu *cpu_bstats;
u8 ewma_log;
u8 intvl_log; /* period : (250ms << intvl_log) */
Expand All @@ -66,7 +66,7 @@ static void est_fetch_counters(struct net_rate_estimator *e,
if (e->stats_lock)
spin_lock(e->stats_lock);

gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats);
gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);

if (e->stats_lock)
spin_unlock(e->stats_lock);
Expand Down Expand Up @@ -113,7 +113,9 @@ static void est_timer(struct timer_list *t)
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @lock: lock for statistics and control path
* @running: qdisc running seqcount
* @running: true if @bstats represents a running qdisc, thus @bstats'
* internal values might change during basic reads. Only used
* if @bstats_cpu is NULL
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
Expand All @@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
seqcount_t *running,
bool running,
struct nlattr *opt)
{
struct gnet_estimator *parm = nla_data(opt);
Expand Down Expand Up @@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @lock: lock for statistics and control path
* @running: qdisc running seqcount (might be NULL)
* @running: true if @bstats represents a running qdisc, thus @bstats'
* internal values might change during basic reads. Only used
* if @cpu_bstats is NULL
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
Expand All @@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt)
bool running, struct nlattr *opt)
{
return gen_new_estimator(bstats, cpu_bstats, rate_est,
lock, running, opt);
Expand Down
50 changes: 28 additions & 22 deletions net/core/gen_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,42 +146,42 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
_bstats_update(bstats, t_bytes, t_packets);
}

void gnet_stats_add_basic(const seqcount_t *running,
struct gnet_stats_basic_sync *bstats,
void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b)
struct gnet_stats_basic_sync *b, bool running)
{
unsigned int seq;
unsigned int start;
u64 bytes = 0;
u64 packets = 0;

WARN_ON_ONCE((cpu || running) && !in_task());

if (cpu) {
gnet_stats_add_basic_cpu(bstats, cpu);
return;
}
do {
if (running)
seq = read_seqcount_begin(running);
start = u64_stats_fetch_begin_irq(&b->syncp);
bytes = u64_stats_read(&b->bytes);
packets = u64_stats_read(&b->packets);
} while (running && read_seqcount_retry(running, seq));
} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));

_bstats_update(bstats, bytes, packets);
}
EXPORT_SYMBOL(gnet_stats_add_basic);

static int
___gnet_stats_copy_basic(const seqcount_t *running,
struct gnet_dump *d,
___gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b,
int type)
int type, bool running)
{
struct gnet_stats_basic_sync bstats;
u64 bstats_bytes, bstats_packets;

gnet_stats_basic_sync_init(&bstats);
gnet_stats_add_basic(running, &bstats, cpu, b);
gnet_stats_add_basic(&bstats, cpu, b, running);

bstats_bytes = u64_stats_read(&bstats.bytes);
bstats_packets = u64_stats_read(&bstats.packets);
Expand Down Expand Up @@ -210,10 +210,14 @@ ___gnet_stats_copy_basic(const seqcount_t *running,

/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
* @running: true if @b represents a running qdisc, thus @b's
* internal values might change during basic reads.
* Only used if @cpu is NULL
*
* Context: task; must not be run from IRQ or BH contexts
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
Expand All @@ -222,22 +226,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic(const seqcount_t *running,
struct gnet_dump *d,
gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b)
struct gnet_stats_basic_sync *b,
bool running)
{
return ___gnet_stats_copy_basic(running, d, cpu, b,
TCA_STATS_BASIC);
return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic);

/**
* gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
* @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
* @running: true if @b represents a running qdisc, thus @b's
* internal values might change during basic reads.
* Only used if @cpu is NULL
*
* Context: task; must not be run from IRQ or BH contexts
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
Expand All @@ -246,13 +253,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic_hw(const seqcount_t *running,
struct gnet_dump *d,
gnet_stats_copy_basic_hw(struct gnet_dump *d,
struct gnet_stats_basic_sync __percpu *cpu,
struct gnet_stats_basic_sync *b)
struct gnet_stats_basic_sync *b,
bool running)
{
return ___gnet_stats_copy_basic(running, d, cpu, b,
TCA_STATS_BASIC_HW);
return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic_hw);

Expand Down
9 changes: 5 additions & 4 deletions net/sched/act_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
&p->tcfa_lock, NULL, est);
&p->tcfa_lock, false, est);
if (err)
goto err4;
}
Expand Down Expand Up @@ -1173,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
if (err < 0)
goto errout;

if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
&p->tcfa_bstats_hw) < 0 ||
if (gnet_stats_copy_basic(&d, p->cpu_bstats,
&p->tcfa_bstats, false) < 0 ||
gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
&p->tcfa_bstats_hw, false) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfa_qstats,
Expand Down
2 changes: 1 addition & 1 deletion net/sched/act_police.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
police->common.cpu_bstats,
&police->tcf_rate_est,
&police->tcf_lock,
NULL, est);
false, est);
if (err)
goto failure;
} else if (tb[TCA_POLICE_AVRATE] &&
Expand Down
Loading

0 comments on commit 29cbcd8

Please sign in to comment.