Skip to content

Commit

Permalink
Merge branch 'xdp-head-adjustment'
Browse files Browse the repository at this point in the history
Martin KaFai Lau says:

====================
Allow head adjustment in XDP prog

This series adds a helper to allow head adjusting in XDP prog.  mlx4
driver has been modified to support this feature.  An example is written
to encapsulate a packet with an IPv4/v6 header and then XDP_TX it
out.

v4:
1. Remove XDP_QUERY_FEATURES command.  Instead, check
   the prog->xdp_adjust_head bit inside the driver itself
   during XDP_SETUP_PROG in patch 1of4.
   Thanks for everybody's ideas.
2. Nit changes on sample code per Jesper

v3:
1. Check if the driver supports head adjustment before
   setting the xdp_prog fd to the device in patch 1of4.
2. Remove the page alignment assumption on the data_hard_start.
   Instead, add data_hard_start to the struct xdp_buff and the
   driver has to fill it if it supports head adjustment.
3. Keep the wire MTU as before in mlx4
4. Set map0_byte_count to PAGE_SIZE in patch 3of4

v2:
1. Make a variable name change in bpf_xdp_adjust_head() in patch 1
2. Ensure no less than ETH_HLEN data in bpf_xdp_adjust_head() in patch 1
3. Some clarifications in commit log messages of patch 2 and 3
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Dec 8, 2016
2 parents 8a03cf2 + 12d8bb6 commit 293bfa9
Show file tree
Hide file tree
Showing 24 changed files with 762 additions and 145 deletions.
4 changes: 2 additions & 2 deletions arch/powerpc/net/bpf_jit_comp64.c
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
func = (u8 *) __bpf_call_base + imm;

/* Save skb pointer if we need to re-cache skb data */
if (bpf_helper_changes_skb_data(func))
if (bpf_helper_changes_pkt_data(func))
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));

bpf_jit_emit_func_call(image, ctx, (u64)func);
Expand All @@ -775,7 +775,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_MR(b2p[BPF_REG_0], 3);

/* refresh skb cache */
if (bpf_helper_changes_skb_data(func)) {
if (bpf_helper_changes_pkt_data(func)) {
/* reload skb pointer to r3 */
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
if (bpf_helper_changes_skb_data((void *)func)) {
if (bpf_helper_changes_pkt_data((void *)func)) {
jit->seen |= SEEN_SKB_CHANGE;
/* lg %b1,ST_OFF_SKBP(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ xadd: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);
if (seen_ld_abs) {
reload_skb_data = bpf_helper_changes_skb_data(func);
reload_skb_data = bpf_helper_changes_pkt_data(func);
if (reload_skb_data) {
EMIT1(0x57); /* push %rdi */
jmp_offset += 22; /* pop, mov, sub, mov */
Expand Down
29 changes: 21 additions & 8 deletions drivers/net/ethernet/mellanox/mlx4/en_netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
#include "mlx4_en.h"
#include "en_port.h"

#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
XDP_PACKET_HEADROOM))

int mlx4_en_setup_tc(struct net_device *dev, u8 up)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
Expand Down Expand Up @@ -2249,6 +2252,19 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
free_netdev(dev);
}

static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
{
struct mlx4_en_priv *priv = netdev_priv(dev);

if (mtu > MLX4_EN_MAX_XDP_MTU) {
en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
mtu, MLX4_EN_MAX_XDP_MTU);
return false;
}

return true;
}

static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
Expand All @@ -2258,11 +2274,10 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
dev->mtu, new_mtu);

if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
en_err(priv, "MTU size:%d requires frags but XDP running\n",
new_mtu);
return -EOPNOTSUPP;
}
if (priv->tx_ring_num[TX_XDP] &&
!mlx4_en_check_xdp_mtu(dev, new_mtu))
return -ENOTSUPP;

dev->mtu = new_mtu;

if (netif_running(dev)) {
Expand Down Expand Up @@ -2710,10 +2725,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
return 0;
}

if (priv->num_frags > 1) {
en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
if (!mlx4_en_check_xdp_mtu(dev, dev->mtu))
return -EOPNOTSUPP;
}

tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp)
Expand Down
70 changes: 42 additions & 28 deletions drivers/net/ethernet/mellanox/mlx4/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
const struct mlx4_en_frag_info *frag_info;
struct page *page;
dma_addr_t dma;
int i;

for (i = 0; i < priv->num_frags; i++) {
Expand All @@ -115,9 +114,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,

for (i = 0; i < priv->num_frags; i++) {
frags[i] = ring_alloc[i];
dma = ring_alloc[i].dma + ring_alloc[i].page_offset;
frags[i].page_offset += priv->frag_info[i].rx_headroom;
rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
frags[i].page_offset);
ring_alloc[i] = page_alloc[i];
rx_desc->data[i].addr = cpu_to_be64(dma);
}

return 0;
Expand Down Expand Up @@ -250,7 +250,8 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,

if (ring->page_cache.index > 0) {
frags[0] = ring->page_cache.buf[--ring->page_cache.index];
rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
frags[0].page_offset);
return 0;
}

Expand Down Expand Up @@ -889,18 +890,27 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (xdp_prog) {
struct xdp_buff xdp;
dma_addr_t dma;
void *orig_data;
u32 act;

dma = be64_to_cpu(rx_desc->data[0].addr);
dma_sync_single_for_cpu(priv->ddev, dma,
priv->frag_info[0].frag_size,
DMA_FROM_DEVICE);

xdp.data = page_address(frags[0].page) +
frags[0].page_offset;
xdp.data_hard_start = page_address(frags[0].page);
xdp.data = xdp.data_hard_start + frags[0].page_offset;
xdp.data_end = xdp.data + length;
orig_data = xdp.data;

act = bpf_prog_run_xdp(xdp_prog, &xdp);

if (xdp.data != orig_data) {
length = xdp.data_end - xdp.data;
frags[0].page_offset = xdp.data -
xdp.data_hard_start;
}

switch (act) {
case XDP_PASS:
break;
Expand Down Expand Up @@ -1164,37 +1174,41 @@ static const int frag_sizes[] = {

void mlx4_en_calc_rx_buf(struct net_device *dev)
{
enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
struct mlx4_en_priv *priv = netdev_priv(dev);
int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
int order = MLX4_EN_ALLOC_PREFER_ORDER;
u32 align = SMP_CACHE_BYTES;
int buf_size = 0;
int i = 0;

/* bpf requires buffers to be set up as 1 packet per page.
* This only works when num_frags == 1.
*/
if (priv->tx_ring_num[TX_XDP]) {
dma_dir = PCI_DMA_BIDIRECTIONAL;
/* This will gain efficient xdp frame recycling at the expense
* of more costly truesize accounting
priv->frag_info[0].order = 0;
priv->frag_info[0].frag_size = eff_mtu;
priv->frag_info[0].frag_prefix_size = 0;
/* This will gain efficient xdp frame recycling at the
* expense of more costly truesize accounting
*/
align = PAGE_SIZE;
order = 0;
}

while (buf_size < eff_mtu) {
priv->frag_info[i].order = order;
priv->frag_info[i].frag_size =
(eff_mtu > buf_size + frag_sizes[i]) ?
frag_sizes[i] : eff_mtu - buf_size;
priv->frag_info[i].frag_prefix_size = buf_size;
priv->frag_info[i].frag_stride =
ALIGN(priv->frag_info[i].frag_size, align);
priv->frag_info[i].dma_dir = dma_dir;
buf_size += priv->frag_info[i].frag_size;
i++;
priv->frag_info[0].frag_stride = PAGE_SIZE;
priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
i = 1;
} else {
int buf_size = 0;

while (buf_size < eff_mtu) {
priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
priv->frag_info[i].frag_size =
(eff_mtu > buf_size + frag_sizes[i]) ?
frag_sizes[i] : eff_mtu - buf_size;
priv->frag_info[i].frag_prefix_size = buf_size;
priv->frag_info[i].frag_stride =
ALIGN(priv->frag_info[i].frag_size,
SMP_CACHE_BYTES);
priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
priv->frag_info[i].rx_headroom = 0;
buf_size += priv->frag_info[i].frag_size;
i++;
}
}

priv->num_frags = i;
Expand Down
9 changes: 5 additions & 4 deletions drivers/net/ethernet/mellanox/mlx4/en_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc frame = {
.page = tx_info->page,
.dma = tx_info->map0_dma,
.page_offset = 0,
.page_offset = XDP_PACKET_HEADROOM,
.page_size = PAGE_SIZE,
};

Expand Down Expand Up @@ -1132,7 +1132,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->page = frame->page;
frame->page = NULL;
tx_info->map0_dma = dma;
tx_info->map0_byte_count = length;
tx_info->map0_byte_count = PAGE_SIZE;
tx_info->nr_txbb = nr_txbb;
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
tx_info->data_offset = (void *)data - (void *)tx_desc;
Expand All @@ -1141,9 +1141,10 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->linear = 1;
tx_info->inl = 0;

dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
length, PCI_DMA_TODEVICE);

data->addr = cpu_to_be64(dma);
data->addr = cpu_to_be64(dma + frame->page_offset);
data->lkey = ring->mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(length);
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ struct mlx4_en_frag_info {
u16 frag_prefix_size;
u32 frag_stride;
enum dma_data_direction dma_dir;
int order;
u16 order;
u16 rx_headroom;
};

#ifdef CONFIG_MLX4_EN_DCB
Expand Down
5 changes: 5 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3183,6 +3183,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
bool reset, was_opened;
int i;

if (prog && prog->xdp_adjust_head) {
netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n");
return -EOPNOTSUPP;
}

mutex_lock(&priv->state_lock);

if ((netdev->features & NETIF_F_LRO) && prog) {
Expand Down
4 changes: 4 additions & 0 deletions drivers/net/ethernet/netronome/nfp/nfp_net_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2946,6 +2946,10 @@ static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
};
int err;

if (prog && prog->xdp_adjust_head) {
nn_err(nn, "Does not support bpf_xdp_adjust_head()\n");
return -EOPNOTSUPP;
}
if (!prog && !nn->xdp_prog)
return 0;
if (prog && nn->xdp_prog) {
Expand Down
5 changes: 5 additions & 0 deletions drivers/net/ethernet/qlogic/qede/qede_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2507,6 +2507,11 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
{
struct qede_reload_args args;

if (prog && prog->xdp_adjust_head) {
DP_ERR(edev, "Does not support bpf_xdp_adjust_head()\n");
return -EOPNOTSUPP;
}

/* If we're called, there was already a bpf reference increment */
args.func = &qede_xdp_reload_func;
args.u.new_prog = prog;
Expand Down
6 changes: 4 additions & 2 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,8 @@ struct bpf_prog {
u16 jited:1, /* Is our filter JIT'ed? */
gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */
dst_needed:1; /* Do we need dst entry? */
dst_needed:1, /* Do we need dst entry? */
xdp_adjust_head:1; /* Adjusting pkt head? */
kmemcheck_bitfield_end(meta);
enum bpf_prog_type type; /* Type of BPF program */
u32 len; /* Number of filter blocks */
Expand Down Expand Up @@ -440,6 +441,7 @@ struct bpf_skb_data_end {
struct xdp_buff {
void *data;
void *data_end;
void *data_hard_start;
};

/* compute the linear packet data range [data, data_end) which
Expand Down Expand Up @@ -595,7 +597,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
bool bpf_helper_changes_skb_data(void *func);
bool bpf_helper_changes_pkt_data(void *func);

struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);
Expand Down
11 changes: 10 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,12 @@ union bpf_attr {
* @len: length of header to be pushed in front
* @flags: Flags (unused for now)
* Return: 0 on success or negative error
*
* int bpf_xdp_adjust_head(xdp_md, delta)
* Adjust the xdp_md.data by delta
* @xdp_md: pointer to xdp_md
* @delta: An positive/negative integer to be added to xdp_md.data
* Return: 0 on success or negative on error
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -469,7 +475,8 @@ union bpf_attr {
FN(csum_update), \
FN(set_hash_invalid), \
FN(get_numa_node_id), \
FN(skb_change_head),
FN(skb_change_head), \
FN(xdp_adjust_head),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
Expand Down Expand Up @@ -576,6 +583,8 @@ struct bpf_sock {
__u32 protocol;
};

#define XDP_PACKET_HEADROOM 256

/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1143,7 +1143,7 @@ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
return prog;
}

bool __weak bpf_helper_changes_skb_data(void *func)
bool __weak bpf_helper_changes_pkt_data(void *func)
{
return false;
}
Expand Down
2 changes: 2 additions & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
return -EINVAL;
}

changes_data = bpf_helper_changes_skb_data(fn->func);
changes_data = bpf_helper_changes_pkt_data(fn->func);

memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
Expand Down
Loading

0 comments on commit 293bfa9

Please sign in to comment.