Skip to content

Commit

Permalink
net_sched: drop packets after root qdisc lock is released
Browse files Browse the repository at this point in the history
Qdisc performance suffers when packets are dropped at enqueue()
time because drops (kfree_skb()) are done while qdisc lock is held,
delaying a dequeue() draining the queue.

Nominal throughput can be reduced by 50 % when this happens,
at a time we would like the dequeue() to proceed as fast as possible.

Even FQ is vulnerable to this problem, while one of FQ goals was
to provide some flow isolation.

This patch adds a 'struct sk_buff **to_free' parameter to all
qdisc->enqueue(), and in qdisc_drop() helper.

I measured a performance increase of up to 12 %, but this patch
is a prereq so that future batches in enqueue() can fly.

Signed-off-by: Eric Dumazet <[email protected]>
Acked-by: Jesper Dangaard Brouer <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Eric Dumazet authored and davem330 committed Jun 25, 2016
1 parent 36195d8 commit 520ac30
Show file tree
Hide file tree
Showing 28 changed files with 170 additions and 114 deletions.
41 changes: 30 additions & 11 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ struct qdisc_size_table {
};

struct Qdisc {
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
struct sk_buff * (*dequeue)(struct Qdisc *dev);
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *sch);
unsigned int flags;
#define TCQ_F_BUILTIN 1
#define TCQ_F_INGRESS 2
Expand Down Expand Up @@ -160,7 +162,9 @@ struct Qdisc_ops {
char id[IFNAMSIZ];
int priv_size;

int (*enqueue)(struct sk_buff *, struct Qdisc *);
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);

Expand Down Expand Up @@ -498,10 +502,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
#endif
}

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
qdisc_calculate_pkt_len(skb, sch);
return sch->enqueue(skb, sch);
return sch->enqueue(skb, sch, to_free);
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
Expand Down Expand Up @@ -626,24 +631,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
return __qdisc_dequeue_head(sch, &sch->q);
}

/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
{
skb->next = *to_free;
*to_free = skb;
}

static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
struct sk_buff_head *list)
struct sk_buff_head *list,
struct sk_buff **to_free)
{
struct sk_buff *skb = __skb_dequeue(list);

if (likely(skb != NULL)) {
unsigned int len = qdisc_pkt_len(skb);

qdisc_qstats_backlog_dec(sch, skb);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return len;
}

return 0;
}

static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
struct sk_buff **to_free)
{
return __qdisc_queue_drop_head(sch, &sch->q);
return __qdisc_queue_drop_head(sch, &sch->q, to_free);
}

static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
Expand Down Expand Up @@ -724,9 +741,11 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
qdisc_qstats_drop(sch);
}

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
kfree_skb(skb);
__qdisc_drop(skb, to_free);
qdisc_qstats_drop(sch);

return NET_XMIT_DROP;
Expand Down
7 changes: 5 additions & 2 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3070,6 +3070,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
struct sk_buff *to_free = NULL;
bool contended;
int rc;

Expand All @@ -3086,7 +3087,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
kfree_skb(skb);
__qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
Expand All @@ -3109,7 +3110,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q) & NET_XMIT_MASK;
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
Expand All @@ -3119,6 +3120,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
}
spin_unlock(root_lock);
if (unlikely(to_free))
kfree_skb_list(to_free);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
Expand Down
9 changes: 5 additions & 4 deletions net/sched/sch_atm.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,

/* --------------------------- Qdisc operations ---------------------------- */

static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
Expand Down Expand Up @@ -398,10 +399,10 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
kfree_skb(skb);
__qdisc_drop(skb, to_free);
goto drop;
case TC_ACT_RECLASSIFY:
if (flow->excess)
Expand All @@ -413,7 +414,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#endif
}

ret = qdisc_enqueue(skb, flow->q);
ret = qdisc_enqueue(skb, flow->q, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
Expand Down
5 changes: 3 additions & 2 deletions net/sched/sch_blackhole.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>

static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
return NET_XMIT_SUCCESS;
}

Expand Down
7 changes: 4 additions & 3 deletions net/sched/sch_cbq.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}

static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
int uninitialized_var(ret);
Expand All @@ -370,11 +371,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return ret;
}

ret = qdisc_enqueue(skb, cl->q);
ret = qdisc_enqueue(skb, cl->q, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
cbq_mark_toplevel(q, cl);
Expand Down
16 changes: 9 additions & 7 deletions net/sched/sch_choke.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ static void choke_zap_tail_holes(struct choke_sched_data *q)
}

/* Drop packet from queue array by creating a "hole" */
static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
struct sk_buff **to_free)
{
struct choke_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb = q->tab[idx];
Expand All @@ -129,7 +130,7 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)

qdisc_qstats_backlog_dec(sch, skb);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
--sch->q.qlen;
}

Expand Down Expand Up @@ -261,7 +262,8 @@ static bool choke_match_random(const struct choke_sched_data *q,
return choke_match_flow(oskb, nskb);
}

static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
Expand All @@ -288,7 +290,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* Draw a packet at random from queue and compare flow */
if (choke_match_random(q, skb, &idx)) {
q->stats.matched++;
choke_drop_by_idx(sch, idx);
choke_drop_by_idx(sch, idx, to_free);
goto congestion_drop;
}

Expand Down Expand Up @@ -331,16 +333,16 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}

q->stats.pdrop++;
return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);

congestion_drop:
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;

other_drop:
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return ret;
}

Expand Down
8 changes: 5 additions & 3 deletions net/sched/sch_codel.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ static void drop_func(struct sk_buff *skb, void *ctx)
{
struct Qdisc *sch = ctx;

qdisc_drop(skb, sch);
kfree_skb(skb);
qdisc_qstats_drop(sch);
}

static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
Expand All @@ -107,7 +108,8 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
return skb;
}

static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct codel_sched_data *q;

Expand All @@ -117,7 +119,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
q = qdisc_priv(sch);
q->drop_overlimit++;
return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);
}

static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
Expand Down
7 changes: 4 additions & 3 deletions net/sched/sch_drr.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
return NULL;
}

static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
Expand All @@ -360,11 +361,11 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return err;
}

err = qdisc_enqueue(skb, cl->qdisc);
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
Expand Down
9 changes: 5 additions & 4 deletions net/sched/sch_dsmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,

/* --------------------------- Qdisc operations ---------------------------- */

static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int err;
Expand Down Expand Up @@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;

case TC_ACT_SHOT:
Expand All @@ -251,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}

err = qdisc_enqueue(skb, p->q);
err = qdisc_enqueue(skb, p->q, to_free);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);
Expand All @@ -264,7 +265,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;

drop:
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}

Expand Down
15 changes: 9 additions & 6 deletions net/sched/sch_fifo.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,32 @@

/* 1 band FIFO pseudo-"scheduler" */

static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
return qdisc_enqueue_tail(skb, sch);

return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);
}

static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);

return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);
}

static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);

/* queue full, remove one skb to fulfill the limit */
__qdisc_queue_drop_head(sch, &sch->q);
__qdisc_queue_drop_head(sch, &sch->q, to_free);
qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);

Expand Down
Loading

0 comments on commit 520ac30

Please sign in to comment.