Skip to content

Commit

Permalink
i40e: Fix DMA mappings leak
Browse files Browse the repository at this point in the history
During reallocation of RX buffers, new DMA mappings are created for
those buffers.

steps for reproduction:
while :
do
for ((i=0; i<=8160; i=i+32))
do
ethtool -G enp130s0f0 rx $i tx $i
sleep 0.5
ethtool -g enp130s0f0
done
done

This resulted in crash:
i40e 0000:01:00.1: Unable to allocate memory for the Rx descriptor ring, size=65536
Driver BUG
WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:141 xdp_rxq_info_unreg+0x43/0x50
Call Trace:
i40e_free_rx_resources+0x70/0x80 [i40e]
i40e_set_ringparam+0x27c/0x800 [i40e]
ethnl_set_rings+0x1b2/0x290
genl_family_rcv_msg_doit.isra.15+0x10f/0x150
genl_family_rcv_msg+0xb3/0x160
? rings_fill_reply+0x1a0/0x1a0
genl_rcv_msg+0x47/0x90
? genl_family_rcv_msg+0x160/0x160
netlink_rcv_skb+0x4c/0x120
genl_rcv+0x24/0x40
netlink_unicast+0x196/0x230
netlink_sendmsg+0x204/0x3d0
sock_sendmsg+0x4c/0x50
__sys_sendto+0xee/0x160
? handle_mm_fault+0xbe/0x1e0
? syscall_trace_enter+0x1d3/0x2c0
__x64_sys_sendto+0x24/0x30
do_syscall_64+0x5b/0x1a0
entry_SYSCALL_64_after_hwframe+0x65/0xca
RIP: 0033:0x7f5eac8b035b
Missing register, driver bug
WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:119 xdp_rxq_info_unreg_mem_model+0x69/0x140
Call Trace:
xdp_rxq_info_unreg+0x1e/0x50
i40e_free_rx_resources+0x70/0x80 [i40e]
i40e_set_ringparam+0x27c/0x800 [i40e]
ethnl_set_rings+0x1b2/0x290
genl_family_rcv_msg_doit.isra.15+0x10f/0x150
genl_family_rcv_msg+0xb3/0x160
? rings_fill_reply+0x1a0/0x1a0
genl_rcv_msg+0x47/0x90
? genl_family_rcv_msg+0x160/0x160
netlink_rcv_skb+0x4c/0x120
genl_rcv+0x24/0x40
netlink_unicast+0x196/0x230
netlink_sendmsg+0x204/0x3d0
sock_sendmsg+0x4c/0x50
__sys_sendto+0xee/0x160
? handle_mm_fault+0xbe/0x1e0
? syscall_trace_enter+0x1d3/0x2c0
__x64_sys_sendto+0x24/0x30
do_syscall_64+0x5b/0x1a0
entry_SYSCALL_64_after_hwframe+0x65/0xca
RIP: 0033:0x7f5eac8b035b

This was caused because of new buffers with different RX ring count should
substitute older ones, but those buffers were freed in
i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi,
thus kfree on rx_bi caused leak of already mapped DMA.

Fix this by reallocating ZC with rx_bi_zc struct when BPF program loads. Additionally
reallocate back to rx_bi when BPF program unloads.

If BPF program is loaded/unloaded and XSK pools are created, reallocate
RX queues accordingly in XSP_SETUP_XSK_POOL handler.

Fixes: be1222b ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings")
Signed-off-by: Jan Sokolowski <[email protected]>
Signed-off-by: Mateusz Palczewski <[email protected]>
Signed-off-by: Jacob Keller <[email protected]>
Tested-by: Chandan <[email protected]> (A Contingent Worker at Intel)
Tested-by: Gurucharan <[email protected]> (A Contingent worker at Intel)
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
JanJSokolowski authored and davem330 committed Oct 14, 2022
1 parent 0d4636f commit aae425e
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 28 deletions.
3 changes: 0 additions & 3 deletions drivers/net/ethernet/intel/i40e/i40e_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
*/
rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
err = i40e_setup_rx_descriptors(&rx_rings[i]);
if (err)
goto rx_unwind;
err = i40e_alloc_rx_bi(&rx_rings[i]);
if (err)
goto rx_unwind;

Expand Down
16 changes: 9 additions & 7 deletions drivers/net/ethernet/intel/i40e/i40e_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3566,12 +3566,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (ring->vsi->type == I40E_VSI_MAIN)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);

kfree(ring->rx_bi);
ring->xsk_pool = i40e_xsk_pool(ring);
if (ring->xsk_pool) {
ret = i40e_alloc_rx_bi_zc(ring);
if (ret)
return ret;
ring->rx_buf_len =
xsk_pool_get_rx_frame_size(ring->xsk_pool);
/* For AF_XDP ZC, we disallow packets to span on
Expand All @@ -3589,9 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->queue_index);

} else {
ret = i40e_alloc_rx_bi(ring);
if (ret)
return ret;
ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == I40E_VSI_MAIN) {
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
Expand Down Expand Up @@ -13296,6 +13289,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
i40e_reset_and_rebuild(pf, true, true);
}

if (!i40e_enabled_xdp_vsi(vsi) && prog) {
if (i40e_realloc_rx_bi_zc(vsi, true))
return -ENOMEM;
} else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
if (i40e_realloc_rx_bi_zc(vsi, false))
return -ENOMEM;
}

for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);

Expand Down Expand Up @@ -13528,6 +13529,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)

i40e_queue_pair_disable_irq(vsi, queue_pair);
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
i40e_queue_pair_clean_rings(vsi, queue_pair);
i40e_queue_pair_reset_stats(vsi, queue_pair);
Expand Down
13 changes: 5 additions & 8 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1457,14 +1457,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return -ENOMEM;
}

int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
{
unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;

rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
return rx_ring->rx_bi ? 0 : -ENOMEM;
}

static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
{
memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
Expand Down Expand Up @@ -1593,6 +1585,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)

rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;

rx_ring->rx_bi =
kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
if (!rx_ring->rx_bi)
return -ENOMEM;

return 0;
}

Expand Down
1 change: 0 additions & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);

/**
* i40e_get_head - Retrieve head from head writeback
Expand Down
67 changes: 59 additions & 8 deletions drivers/net/ethernet/intel/i40e/i40e_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@
#include "i40e_txrx_common.h"
#include "i40e_xsk.h"

int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
{
unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;

rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
}

void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
{
memset(rx_ring->rx_bi_zc, 0,
Expand All @@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
return &rx_ring->rx_bi_zc[idx];
}

/**
* i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer
* @rx_ring: Current rx ring
* @pool_present: is pool for XSK present
*
* Try allocating memory and return ENOMEM, if failed to allocate.
* If allocation was successful, substitute buffer with allocated one.
* Returns 0 on success, negative on failure
*/
static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
{
size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
sizeof(*rx_ring->rx_bi);
void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);

if (!sw_ring)
return -ENOMEM;

if (pool_present) {
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
rx_ring->rx_bi_zc = sw_ring;
} else {
kfree(rx_ring->rx_bi_zc);
rx_ring->rx_bi_zc = NULL;
rx_ring->rx_bi = sw_ring;
}
return 0;
}

/**
* i40e_realloc_rx_bi_zc - reallocate rx SW rings
* @vsi: Current VSI
* @zc: is zero copy set
*
* Reallocate buffer for rx_rings that might be used by XSK.
* XDP requires more memory, than rx_buf provides.
* Returns 0 on success, negative on failure
*/
int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
{
struct i40e_ring *rx_ring;
unsigned long q;

for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
rx_ring = vsi->rx_rings[q];
if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
return -ENOMEM;
}
return 0;
}

/**
* i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
* certain ring/qid
Expand Down Expand Up @@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
if (err)
return err;

err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
if (err)
return err;

err = i40e_queue_pair_enable(vsi, qid);
if (err)
return err;
Expand Down Expand Up @@ -113,6 +161,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);

if (if_running) {
err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
if (err)
return err;
err = i40e_queue_pair_enable(vsi, qid);
if (err)
return err;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40e/i40e_xsk.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);

bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);

#endif /* _I40E_XSK_H_ */

0 comments on commit aae425e

Please sign in to comment.