Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add the ability to use unaligned chunks in the AF_XDP umem. By
   relaxing where the chunks can be placed, it allows to use an
   arbitrary buffer size and place whenever there is a free
   address in the umem. Helps more seamless DPDK AF_XDP driver
   integration. Support for i40e, ixgbe and mlx5e, from Kevin and
   Maxim.

2) Addition of a wakeup flag for AF_XDP tx and fill rings so the
   application can wake up the kernel for rx/tx processing which
   avoids busy-spinning of the latter, useful when app and driver
   is located on the same core. Support for i40e, ixgbe and mlx5e,
   from Magnus and Maxim.

3) bpftool fixes for printf()-like functions so compiler can actually
   enforce checks, bpftool build system improvements for custom output
   directories, and addition of 'bpftool map freeze' command, from Quentin.

4) Support attaching/detaching XDP programs from 'bpftool net' command,
   from Daniel.

5) Automatic xskmap cleanup when AF_XDP socket is released, and several
   barrier/{read,write}_once fixes in AF_XDP code, from Björn.

6) Relicense of bpf_helpers.h/bpf_endian.h for future libbpf
   inclusion as well as libbpf versioning improvements, from Andrii.

7) Several new BPF kselftests for verifier precision tracking, from Alexei.

8) Several BPF kselftest fixes wrt endianess to run on s390x, from Ilya.

9) And more BPF kselftest improvements all over the place, from Stanislav.

10) Add simple BPF map op cache for nfp driver to batch dumps, from Jakub.

11) AF_XDP socket umem mapping improvements for 32bit archs, from Ivan.

12) Add BPF-to-BPF call and BTF line info support for s390x JIT, from Yauheni.

13) Small optimization in arm64 JIT to spare 1 insns for BPF_MOD, from Jerin.

14) Fix an error check in bpf_tcp_gen_syncookie() helper, from Petar.

15) Various minor fixes and cleanups, from Nathan, Masahiro, Masanari,
    Peter, Wei, Yue.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Sep 6, 2019
2 parents f9bcfe2 + 593f191 commit 1e46c09
Show file tree
Hide file tree
Showing 124 changed files with 3,489 additions and 698 deletions.
10 changes: 6 additions & 4 deletions Documentation/networking/af_xdp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,12 @@ an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has

Frames passed to the kernel are used for the ingress path (RX rings).

The user application produces UMEM addrs to this ring. Note that the
kernel will mask the incoming addr. E.g. for a chunk size of 2k, the
log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050
and 3000 refers to the same chunk.
The user application produces UMEM addrs to this ring. Note that, if
running the application with aligned chunk mode, the kernel will mask
the incoming addr. E.g. for a chunk size of 2k, the log2(2048) LSB of
the addr will be masked off, meaning that 2048, 2050 and 3000 refers
to the same chunk. If the user application is run in the unaligned
chunks mode, then the incoming addr will be left untouched.


UMEM Completion Ring
Expand Down
3 changes: 3 additions & 0 deletions arch/arm64/net/bpf_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@
/* Rd = Ra + Rn * Rm */
#define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
/* Rd = Ra - Rn * Rm */
#define A64_MSUB(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
A64_VARIANT(sf), AARCH64_INSN_DATA3_MSUB)
/* Rd = Rn * Rm */
#define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)

Expand Down
6 changes: 2 additions & 4 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_MOD:
emit(A64_UDIV(is64, tmp, dst, src), ctx);
emit(A64_MUL(is64, tmp, tmp, src), ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx);
emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
break;
}
break;
Expand Down Expand Up @@ -516,8 +515,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_ALU64 | BPF_MOD | BPF_K:
emit_a64_mov_i(is64, tmp2, imm, ctx);
emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx);
emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
Expand Down
67 changes: 56 additions & 11 deletions arch/s390/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
* NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
* stack space for the large switch statement.
*/
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
Expand Down Expand Up @@ -1011,10 +1012,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
*/
case BPF_JMP | BPF_CALL:
{
/*
* b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
*/
const u64 func = (u64)__bpf_call_base + imm;
u64 func;
bool func_addr_fixed;
int ret;

ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
&func, &func_addr_fixed);
if (ret < 0)
return -1;

REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
Expand Down Expand Up @@ -1283,7 +1288,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/*
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
bool extra_pass)
{
int i, insn_count;

Expand All @@ -1292,7 +1298,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)

bpf_jit_prologue(jit, fp->aux->stack_depth);
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i);
insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
Expand All @@ -1311,14 +1317,22 @@ bool bpf_jit_needs_zext(void)
return true;
}

struct s390_jit_data {
struct bpf_binary_header *header;
struct bpf_jit ctx;
int pass;
};

/*
* Compile eBPF program "fp"
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
struct s390_jit_data *jit_data;
bool tmp_blinded = false;
bool extra_pass = false;
struct bpf_jit jit;
int pass;

Expand All @@ -1337,6 +1351,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp;
}

jit_data = fp->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
fp = orig_fp;
goto out;
}
fp->aux->jit_data = jit_data;
}
if (jit_data->ctx.addrs) {
jit = jit_data->ctx;
header = jit_data->header;
extra_pass = true;
pass = jit_data->pass + 1;
goto skip_init_ctx;
}

memset(&jit, 0, sizeof(jit));
jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
Expand All @@ -1349,7 +1380,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
* - 3: Calculate program size and addrs arrray
*/
for (pass = 1; pass <= 3; pass++) {
if (bpf_jit_prog(&jit, fp)) {
if (bpf_jit_prog(&jit, fp, extra_pass)) {
fp = orig_fp;
goto free_addrs;
}
Expand All @@ -1361,12 +1392,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = orig_fp;
goto free_addrs;
}

header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
if (!header) {
fp = orig_fp;
goto free_addrs;
}
if (bpf_jit_prog(&jit, fp)) {
skip_init_ctx:
if (bpf_jit_prog(&jit, fp, extra_pass)) {
bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
Expand All @@ -1375,12 +1408,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
print_fn_code(jit.prg_buf, jit.size_prg);
}
bpf_jit_binary_lock_ro(header);
if (!fp->is_func || extra_pass) {
bpf_jit_binary_lock_ro(header);
} else {
jit_data->header = header;
jit_data->ctx = jit;
jit_data->pass = pass;
}
fp->bpf_func = (void *) jit.prg_buf;
fp->jited = 1;
fp->jited_len = jit.size;

if (!fp->is_func || extra_pass) {
bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
free_addrs:
kfree(jit.addrs);
kfree(jit.addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
}
out:
if (tmp_blinded)
bpf_jit_prog_release_other(fp, fp == orig_fp ?
Expand Down
5 changes: 3 additions & 2 deletions drivers/net/ethernet/intel/i40e/i40e_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -12530,7 +12530,8 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
if (need_reset && prog)
for (i = 0; i < vsi->num_queue_pairs; i++)
if (vsi->xdp_rings[i]->xsk_umem)
(void)i40e_xsk_async_xmit(vsi->netdev, i);
(void)i40e_xsk_wakeup(vsi->netdev, i,
XDP_WAKEUP_RX);

return 0;
}
Expand Down Expand Up @@ -12852,7 +12853,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
.ndo_bpf = i40e_xdp,
.ndo_xdp_xmit = i40e_xdp_xmit,
.ndo_xsk_async_xmit = i40e_xsk_async_xmit,
.ndo_xsk_wakeup = i40e_xsk_wakeup,
.ndo_dfwd_add_station = i40e_fwd_add,
.ndo_dfwd_del_station = i40e_fwd_del,
};
Expand Down
52 changes: 35 additions & 17 deletions drivers/net/ethernet/intel/i40e/i40e_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
return err;

/* Kick start the NAPI context so that receiving will start */
err = i40e_xsk_async_xmit(vsi->netdev, qid);
err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
if (err)
return err;
}
Expand Down Expand Up @@ -190,7 +190,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
**/
static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{
struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = I40E_XDP_PASS;
u64 offset = umem->headroom;
struct i40e_ring *xdp_ring;
struct bpf_prog *xdp_prog;
u32 act;
Expand All @@ -201,7 +203,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
*/
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
xdp->handle += xdp->data - xdp->data_hard_start;
offset += xdp->data - xdp->data_hard_start;

xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);

switch (act) {
case XDP_PASS:
break;
Expand Down Expand Up @@ -262,7 +267,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;

bi->handle = handle + umem->headroom;
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);

xsk_umem_discard_addr(umem);
return true;
Expand Down Expand Up @@ -299,7 +304,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;

bi->handle = handle + umem->headroom;
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);

xsk_umem_discard_addr_rq(umem);
return true;
Expand Down Expand Up @@ -420,23 +425,16 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *old_bi)
{
struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
u16 nta = rx_ring->next_to_alloc;

/* update, and store next to alloc */
nta++;
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;

/* transfer page from old buffer to new buffer */
new_bi->dma = old_bi->dma & mask;
new_bi->dma += hr;

new_bi->addr = (void *)((unsigned long)old_bi->addr & mask);
new_bi->addr += hr;

new_bi->handle = old_bi->handle & mask;
new_bi->handle += rx_ring->xsk_umem->headroom;
new_bi->dma = old_bi->dma;
new_bi->addr = old_bi->addr;
new_bi->handle = old_bi->handle;

old_bi->addr = NULL;
}
Expand Down Expand Up @@ -471,7 +469,8 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
bi->addr += hr;

bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
rx_ring->xsk_umem->headroom);
}

/**
Expand Down Expand Up @@ -626,6 +625,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)

i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);

if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
else
xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);

return (int)total_rx_packets;
}
return failure ? budget : (int)total_rx_packets;
}

Expand Down Expand Up @@ -681,6 +689,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
i40e_xdp_ring_update_tail(xdp_ring);

xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
}

return !!budget && work_done;
Expand Down Expand Up @@ -759,19 +769,27 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
i40e_update_tx_stats(tx_ring, completed_frames, total_bytes);

out_xmit:
if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
if (tx_ring->next_to_clean == tx_ring->next_to_use)
xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
else
xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
}

xmit_done = i40e_xmit_zc(tx_ring, budget);

return work_done && xmit_done;
}

/**
* i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit
* i40e_xsk_wakeup - Implements the ndo_xsk_wakeup
* @dev: the netdevice
* @queue_id: queue id to wake up
* @flags: ignored in our case since we have Rx and Tx in the same NAPI.
*
* Returns <0 for errors, 0 otherwise.
**/
int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40e/i40e_xsk.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);

bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
struct i40e_ring *tx_ring, int napi_budget);
int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id);
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);

#endif /* _I40E_XSK_H_ */
5 changes: 3 additions & 2 deletions drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10260,7 +10260,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
if (need_reset && prog)
for (i = 0; i < adapter->num_rx_queues; i++)
if (adapter->xdp_ring[i]->xsk_umem)
(void)ixgbe_xsk_async_xmit(adapter->netdev, i);
(void)ixgbe_xsk_wakeup(adapter->netdev, i,
XDP_WAKEUP_RX);

return 0;
}
Expand Down Expand Up @@ -10379,7 +10380,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_features_check = ixgbe_features_check,
.ndo_bpf = ixgbe_xdp,
.ndo_xdp_xmit = ixgbe_xdp_xmit,
.ndo_xsk_async_xmit = ixgbe_xsk_async_xmit,
.ndo_xsk_wakeup = ixgbe_xsk_wakeup,
};

static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring *tx_ring, int napi_budget);
int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id);
int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);

#endif /* #define _IXGBE_TXRX_COMMON_H_ */
Loading

0 comments on commit 1e46c09

Please sign in to comment.