Skip to content

Commit

Permalink
[CORE] Stack changes to add multiqueue hardware support API
Browse files Browse the repository at this point in the history
Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them at
the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr <[email protected]>
Signed-off-by: Patrick McHardy <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
ppwaskie authored and David S. Miller committed Jul 11, 2007
1 parent a093bf0 commit f25f4e4
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 30 deletions.
8 changes: 8 additions & 0 deletions drivers/net/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ menuconfig NETDEVICES
# that for each of the symbols.
if NETDEVICES

config NETDEVICES_MULTIQUEUE
bool "Netdevice multiple hardware queue support"
---help---
Say Y here if you want to allow the network stack to use multiple
hardware TX queues on an ethernet device.

Most people will say N here.

config IFB
tristate "Intermediate Functional Block support"
depends on NET_CLS_ACT
Expand Down
3 changes: 2 additions & 1 deletion include/linux/etherdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev
extern int eth_header_cache(struct neighbour *neigh,
struct hh_cache *hh);

extern struct net_device *alloc_etherdev(int sizeof_priv);
extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)

/**
* is_zero_ether_addr - Determine if give Ethernet address is all zeros.
Expand Down
80 changes: 75 additions & 5 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ struct wireless_dev;
#define MAX_HEADER (LL_MAX_HEADER + 48)
#endif

struct net_device_subqueue
{
/* Give a control state for each queue. This struct may contain
* per-queue locks in the future.
*/
unsigned long state;
};

/*
* Network device statistics. Akin to the 2.0 ether stats but
* with byte counters.
Expand Down Expand Up @@ -331,6 +339,7 @@ struct net_device
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_GSO 2048 /* Enable software GSO. */
#define NETIF_F_LLTX 4096 /* LockLess TX */
#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */

/* Segmentation offload features */
#define NETIF_F_GSO_SHIFT 16
Expand Down Expand Up @@ -557,6 +566,10 @@ struct net_device

/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;

/* The TX queue control structures */
unsigned int egress_subqueue_count;
struct net_device_subqueue egress_subqueue[0];
};
#define to_net_dev(d) container_of(d, struct net_device, dev)

Expand All @@ -565,9 +578,7 @@ struct net_device

static inline void *netdev_priv(const struct net_device *dev)
{
return (char *)dev + ((sizeof(struct net_device)
+ NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST);
return dev->priv;
}

#define SET_MODULE_OWNER(dev) do { } while (0)
Expand Down Expand Up @@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev)
return test_bit(__LINK_STATE_START, &dev->state);
}

/*
* Routines to manage the subqueues on a device. We only need start
* stop, and a check if it's stopped. All other device management is
* done at the overall netdevice level.
* Also test the device if we're multiqueue.
*/
static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
#endif
}

static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
#ifdef CONFIG_NETPOLL_TRAP
if (netpoll_trap())
return;
#endif
set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
#endif
}

static inline int netif_subqueue_stopped(const struct net_device *dev,
u16 queue_index)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
return test_bit(__LINK_STATE_XOFF,
&dev->egress_subqueue[queue_index].state);
#else
return 0;
#endif
}

static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
#ifdef CONFIG_NETPOLL_TRAP
if (netpoll_trap())
return;
#endif
if (test_and_clear_bit(__LINK_STATE_XOFF,
&dev->egress_subqueue[queue_index].state))
__netif_schedule(dev);
#endif
}

static inline int netif_is_multiqueue(const struct net_device *dev)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
return (!!(NETIF_F_MULTI_QUEUE & dev->features));
#else
return 0;
#endif
}

/* Use this variant when it is known for sure that it
* is executing from interrupt context.
Expand Down Expand Up @@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev)
extern void ether_setup(struct net_device *dev);

/* Support for loadable net-drivers */
extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
void (*setup)(struct net_device *));
extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *),
unsigned int queue_count);
#define alloc_netdev(sizeof_priv, name, setup) \
alloc_netdev_mq(sizeof_priv, name, setup, 1)
extern int register_netdev(struct net_device *dev);
extern void unregister_netdev(struct net_device *dev);
/* Functions used for secondary unicast and multicast support */
Expand Down
25 changes: 22 additions & 3 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t;
* @sk: Socket we are owned by
* @tstamp: Time we arrived
* @dev: Device we arrived on/are leaving by
* @iif: ifindex of device we arrived on
* @transport_header: Transport layer header
* @network_header: Network layer header
* @mac_header: Link layer header
Expand Down Expand Up @@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t;
* @nfctinfo: Relationship of this skb to the connection
* @nfct_reasm: netfilter conntrack re-assembly pointer
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @iif: ifindex of device we arrived on
* @queue_mapping: Queue mapping for multiqueue devices
* @tc_index: Traffic control index
* @tc_verd: traffic control verdict
* @dma_cookie: a cookie to one of several possible DMA operations
Expand All @@ -246,8 +247,6 @@ struct sk_buff {
struct sock *sk;
ktime_t tstamp;
struct net_device *dev;
int iif;
/* 4 byte hole on 64 bit*/

struct dst_entry *dst;
struct sec_path *sp;
Expand Down Expand Up @@ -290,12 +289,18 @@ struct sk_buff {
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif

int iif;
__u16 queue_mapping;

#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16 tc_verd; /* traffic control verdict */
#endif
#endif
/* 2 byte hole */

#ifdef CONFIG_NET_DMA
dma_cookie_t dma_cookie;
#endif
Expand Down Expand Up @@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb)
{ }
#endif

static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
skb->queue_mapping = queue_mapping;
#endif
}

static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
{
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
to->queue_mapping = from->queue_mapping;
#endif
}

static inline int skb_is_gso(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_size;
Expand Down
36 changes: 26 additions & 10 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb->next = nskb;
return rc;
}
if (unlikely(netif_queue_stopped(dev) && skb->next))
if (unlikely((netif_queue_stopped(dev) ||
netif_subqueue_stopped(dev, skb->queue_mapping)) &&
skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);

Expand Down Expand Up @@ -1547,6 +1549,8 @@ int dev_queue_xmit(struct sk_buff *skb)
spin_lock(&dev->queue_lock);
q = dev->qdisc;
if (q->enqueue) {
/* reset queue_mapping to zero */
skb->queue_mapping = 0;
rc = q->enqueue(skb, q);
qdisc_run(dev);
spin_unlock(&dev->queue_lock);
Expand Down Expand Up @@ -1576,7 +1580,8 @@ int dev_queue_xmit(struct sk_buff *skb)

HARD_TX_LOCK(dev, cpu);

if (!netif_queue_stopped(dev)) {
if (!netif_queue_stopped(dev) &&
!netif_subqueue_stopped(dev, skb->queue_mapping)) {
rc = 0;
if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
Expand Down Expand Up @@ -3539,16 +3544,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
}

/**
* alloc_netdev - allocate network device
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
* @queue_count: the number of subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization.
* and performs basic initialization. Also allocates subquue structs
* for each queue on the device at the end of the netdevice.
*/
struct net_device *alloc_netdev(int sizeof_priv, const char *name,
void (*setup)(struct net_device *))
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
void *p;
struct net_device *dev;
Expand All @@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
BUG_ON(strlen(name) >= sizeof(dev->name));

/* ensure 32-byte alignment of both the device and private area */
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
(sizeof(struct net_device_subqueue) * queue_count)) &
~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;

p = kzalloc(alloc_size, GFP_KERNEL);
Expand All @@ -3570,15 +3579,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;

if (sizeof_priv)
dev->priv = netdev_priv(dev);
if (sizeof_priv) {
dev->priv = ((char *)dev +
((sizeof(struct net_device) +
(sizeof(struct net_device_subqueue) *
queue_count) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}

dev->egress_subqueue_count = queue_count;

dev->get_stats = internal_stats;
setup(dev);
strcpy(dev->name, name);
return dev;
}
EXPORT_SYMBOL(alloc_netdev);
EXPORT_SYMBOL(alloc_netdev_mq);

/**
* free_netdev - free network device
Expand Down
8 changes: 5 additions & 3 deletions net/core/netpoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work)

local_irq_save(flags);
netif_tx_lock(dev);
if (netif_queue_stopped(dev) ||
dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
if ((netif_queue_stopped(dev) ||
netif_subqueue_stopped(dev, skb->queue_mapping)) ||
dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
netif_tx_unlock(dev);
local_irq_restore(flags);
Expand Down Expand Up @@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (netif_tx_trylock(dev)) {
if (!netif_queue_stopped(dev))
if (!netif_queue_stopped(dev) &&
!netif_subqueue_stopped(dev, skb->queue_mapping))
status = dev->hard_start_xmit(skb, dev);
netif_tx_unlock(dev);

Expand Down
10 changes: 7 additions & 3 deletions net/core/pktgen.c
Original file line number Diff line number Diff line change
Expand Up @@ -3139,7 +3139,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}

if (netif_queue_stopped(odev) || need_resched()) {
if ((netif_queue_stopped(odev) ||
netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) ||
need_resched()) {
idle_start = getCurUs();

if (!netif_running(odev)) {
Expand All @@ -3154,7 +3156,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)

pkt_dev->idle_acc += getCurUs() - idle_start;

if (netif_queue_stopped(odev)) {
if (netif_queue_stopped(odev) ||
netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) {
pkt_dev->next_tx_us = getCurUs(); /* TODO */
pkt_dev->next_tx_ns = 0;
goto out; /* Try the next interface */
Expand All @@ -3181,7 +3184,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}

netif_tx_lock_bh(odev);
if (!netif_queue_stopped(odev)) {
if (!netif_queue_stopped(odev) &&
!netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) {

atomic_inc(&(pkt_dev->skb->users));
retry_now:
Expand Down
3 changes: 3 additions & 0 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->nohdr = 0;
C(pkt_type);
C(ip_summed);
skb_copy_queue_mapping(n, skb);
C(priority);
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
C(ipvs_property);
Expand Down Expand Up @@ -460,6 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
new->sk = NULL;
new->dev = old->dev;
skb_copy_queue_mapping(new, old);
new->priority = old->priority;
new->protocol = old->protocol;
new->dst = dst_clone(old->dst);
Expand Down Expand Up @@ -1932,6 +1934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
tail = nskb;

nskb->dev = skb->dev;
skb_copy_queue_mapping(nskb, skb);
nskb->priority = skb->priority;
nskb->protocol = skb->protocol;
nskb->dst = dst_clone(skb->dst);
Expand Down
Loading

0 comments on commit f25f4e4

Please sign in to comment.