Skip to content

Commit

Permalink
Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/tnguy/net-queue

Tony Nguyen says:

====================
ice: xsk: ZC changes

Maciej Fijalkowski says:

This set consists of two fixes to issues that were either pointed out on
indirectly (John was reviewing AF_XDP selftests that were testing ice's
ZC support) mailing list or were directly reported by customers.

First patch allows user space to see done descriptor in CQ even after a
single frame being transmitted and second patch removes the need for
having HW rings sized to power of 2 number of descriptors when used
against AF_XDP.

I also forgot to mention that due to the current Tx cleaning algorithm,
4k HW ring was broken and these two patches bring it back to life, so we
kill two birds with one stone.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: xsk: drop power of 2 ring size restriction for AF_XDP
  ice: xsk: change batched Tx descriptor cleaning
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Sep 29, 2022
2 parents c9da02b + b3056ae commit 3e1308a
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 101 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1467,7 +1467,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
bool wd;

if (tx_ring->xsk_pool)
wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget);
wd = ice_xmit_zc(tx_ring);
else if (ice_ring_is_xdp(tx_ring))
wd = true;
else
Expand Down
163 changes: 68 additions & 95 deletions drivers/net/ethernet/intel/ice/ice_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,13 +392,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
goto failure;
}

if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
!is_power_of_2(vsi->tx_rings[qid]->count)) {
netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
pool_failure = -EINVAL;
goto failure;
}

if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);

if (if_running) {
Expand Down Expand Up @@ -534,11 +527,10 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
{
u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
u16 batched, leftover, i, tail_bumps;
u16 leftover, i, tail_bumps;

batched = ALIGN_DOWN(count, rx_thresh);
tail_bumps = batched / rx_thresh;
leftover = count & (rx_thresh - 1);
tail_bumps = count / rx_thresh;
leftover = count - (tail_bumps * rx_thresh);

for (i = 0; i < tail_bumps; i++)
if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
Expand Down Expand Up @@ -788,69 +780,57 @@ ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
}

/**
* ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
* @xdp_ring: XDP ring to clean
* @napi_budget: amount of descriptors that NAPI allows us to clean
*
* Returns count of cleaned descriptors
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
*/
static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
int budget = napi_budget / tx_thresh;
u16 next_dd = xdp_ring->next_dd;
u16 ntc, cleared_dds = 0;

do {
struct ice_tx_desc *next_dd_desc;
u16 desc_cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u32 xsk_frames;
u16 i;

next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
if (!(next_dd_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
break;
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u16 xsk_frames = 0;
u16 last_rs;
int i;

cleared_dds++;
xsk_frames = 0;
if (likely(!xdp_ring->xdp_tx_active)) {
xsk_frames = tx_thresh;
goto skip;
}
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
if ((tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
if (last_rs >= ntc)
xsk_frames = last_rs - ntc + 1;
else
xsk_frames = last_rs + cnt - ntc + 1;
}

ntc = xdp_ring->next_to_clean;
if (!xsk_frames)
return;

for (i = 0; i < tx_thresh; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (likely(!xdp_ring->xdp_tx_active))
goto skip;

if (tx_buf->raw_buf) {
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
tx_buf->raw_buf = NULL;
} else {
xsk_frames++;
}
ntc = xdp_ring->next_to_clean;
for (i = 0; i < xsk_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];

ntc++;
if (ntc >= xdp_ring->count)
ntc = 0;
if (tx_buf->raw_buf) {
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
tx_buf->raw_buf = NULL;
} else {
xsk_frames++;
}

ntc++;
if (ntc >= xdp_ring->count)
ntc = 0;
}
skip:
xdp_ring->next_to_clean += tx_thresh;
if (xdp_ring->next_to_clean >= desc_cnt)
xdp_ring->next_to_clean -= desc_cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
next_dd_desc->cmd_type_offset_bsz = 0;
next_dd = next_dd + tx_thresh;
if (next_dd >= desc_cnt)
next_dd = tx_thresh - 1;
} while (--budget);

xdp_ring->next_dd = next_dd;

return cleared_dds * tx_thresh;
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean += xsk_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}

/**
Expand Down Expand Up @@ -885,7 +865,6 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
unsigned int *total_bytes)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
u32 i;
Expand All @@ -905,13 +884,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
}

xdp_ring->next_to_use = ntu;

if (xdp_ring->next_to_use > xdp_ring->next_rs) {
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs += tx_thresh;
}
}

/**
Expand All @@ -924,7 +896,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
u32 nb_pkts, unsigned int *total_bytes)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u32 batched, leftover, i;

batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
Expand All @@ -933,54 +904,54 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d
ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
for (; i < batched + leftover; i++)
ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
}

if (xdp_ring->next_to_use > xdp_ring->next_rs) {
struct ice_tx_desc *tx_desc;
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*/
static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
{
u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;

tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs += tx_thresh;
}
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
}

/**
* ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
* @budget: number of free descriptors on HW Tx ring that can be used
* @napi_budget: amount of descriptors that NAPI allows us to clean
*
* Returns true if there is no more work that needs to be done, false otherwise
*/
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
{
struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u32 nb_pkts, nb_processed = 0;
unsigned int total_bytes = 0;
int budget;

ice_clean_xdp_irq_zc(xdp_ring);

if (budget < tx_thresh)
budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
budget = ICE_DESC_UNUSED(xdp_ring);
budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));

nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
if (!nb_pkts)
return true;

if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
struct ice_tx_desc *tx_desc;

nb_processed = xdp_ring->count - xdp_ring->next_to_use;
ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs = tx_thresh - 1;
xdp_ring->next_to_use = 0;
}

ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
&total_bytes);

ice_set_rs_bit(xdp_ring);
ice_xdp_ring_update_tail(xdp_ring);
ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);

Expand Down Expand Up @@ -1058,14 +1029,16 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
*/
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
{
u16 count_mask = rx_ring->count - 1;
u16 ntc = rx_ring->next_to_clean;
u16 ntu = rx_ring->next_to_use;

for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
while (ntc != ntu) {
struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);

xsk_buff_free(xdp);
ntc++;
if (ntc >= rx_ring->count)
ntc = 0;
}
}

Expand Down
7 changes: 2 additions & 5 deletions drivers/net/ethernet/intel/ice/ice_xsk.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,10 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
#else
static inline bool
ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
u32 __always_unused budget,
int __always_unused napi_budget)
static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
{
return false;
}
Expand Down

0 comments on commit 3e1308a

Please sign in to comment.