Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-05-31

The following pull-request contains BPF updates for your *net-next* tree.

Lots of exciting new features in the first PR of this developement cycle!
The main changes are:

1) misc verifier improvements, from Alexei.

2) bpftool can now convert btf to valid C, from Andrii.

3) verifier can insert explicit ZEXT insn when requested by 32-bit JITs.
   This feature greatly improves BPF speed on 32-bit architectures. From Jiong.

4) cgroups will now auto-detach bpf programs. This fixes issue of thousands
   bpf programs got stuck in dying cgroups. From Roman.

5) new bpf_send_signal() helper, from Yonghong.

6) cgroup inet skb programs can signal CN to the stack, from Lawrence.

7) miscellaneous cleanups, from many developers.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jun 1, 2019
2 parents 33aae28 + cd53850 commit 0462eaa
Show file tree
Hide file tree
Showing 122 changed files with 6,430 additions and 1,013 deletions.
30 changes: 25 additions & 5 deletions Documentation/bpf/bpf_design_QA.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,31 @@ registers which makes BPF inefficient virtual machine for 32-bit
CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
be added to BPF in the future?

A: NO. The first thing to improve performance on 32-bit archs is to teach
LLVM to generate code that uses 32-bit subregisters. Then second step
is to teach verifier to mark operations where zero-ing upper bits
is unnecessary. Then JITs can take advantage of those markings and
drastically reduce size of generated code and improve performance.
A: NO.

But some optimizations on zero-ing the upper 32 bits for BPF registers are
available, and can be leveraged to improve the performance of JITed BPF
programs for 32-bit architectures.

Starting with version 7, LLVM is able to generate instructions that operate
on 32-bit subregisters, provided the option -mattr=+alu32 is passed for
compiling a program. Furthermore, the verifier can now mark the
instructions for which zero-ing the upper bits of the destination register
is required, and insert an explicit zero-extension (zext) instruction
(a mov32 variant). This means that for architectures without zext hardware
support, the JIT back-ends do not need to clear the upper bits for
subregisters written by alu32 instructions or narrow loads. Instead, the
back-ends simply need to support code generation for that mov32 variant,
and to overwrite bpf_jit_needs_zext() to make it return "true" (in order to
enable zext insertion in the verifier).

Note that it is possible for a JIT back-end to have partial hardware
support for zext. In that case, if verifier zext insertion is enabled,
it could lead to the insertion of unnecessary zext instructions. Such
instructions could be removed by creating a simple peephole inside the JIT
back-end: if one instruction has hardware support for zext and if the next
instruction is an explicit zext, then the latter can be skipped when doing
the code generation.

Q: Does BPF have a stable ABI?
------------------------------
Expand Down
42 changes: 31 additions & 11 deletions arch/arm/net/bpf_jit_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],

/* ALU operation */
emit_alu_r(rd[1], rs, true, false, op, ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
}

arm_bpf_put_reg64(dst, rd, ctx);
Expand All @@ -758,8 +759,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
struct jit_ctx *ctx) {
if (!is64) {
emit_a32_mov_r(dst_lo, src_lo, ctx);
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
Expand Down Expand Up @@ -1060,17 +1062,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
case BPF_B:
/* Load a Byte */
emit(ARM_LDRB_I(rd[1], rm, off), ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_H:
/* Load a HalfWord */
emit(ARM_LDRH_I(rd[1], rm, off), ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_W:
/* Load a Word */
emit(ARM_LDR_I(rd[1], rm, off), ctx);
emit_a32_mov_i(rd[0], 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(rd[0], 0, ctx);
break;
case BPF_DW:
/* Load a Double Word */
Expand Down Expand Up @@ -1359,6 +1364,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_MOV | BPF_X:
switch (BPF_SRC(code)) {
case BPF_X:
if (imm == 1) {
/* Special mov32 for zext */
emit_a32_mov_i(dst_hi, 0, ctx);
break;
}
emit_a32_mov_r64(is64, dst, src, ctx);
break;
case BPF_K:
Expand Down Expand Up @@ -1438,7 +1448,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
}
emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_X:
Expand All @@ -1453,7 +1464,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
return -EINVAL;
if (imm)
emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
/* dst = dst << imm */
case BPF_ALU64 | BPF_LSH | BPF_K:
Expand Down Expand Up @@ -1488,7 +1500,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = ~dst */
case BPF_ALU | BPF_NEG:
emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
emit_a32_mov_i(dst_hi, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
/* dst = ~dst (64 bit) */
case BPF_ALU64 | BPF_NEG:
Expand Down Expand Up @@ -1544,11 +1557,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
#else /* ARMv6+ */
emit(ARM_UXTH(rd[1], rd[1]), ctx);
#endif
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
if (!ctx->prog->aux->verifier_zext)
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
break;
case 32:
/* zero-extend 32 bits into 64 bits */
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
if (!ctx->prog->aux->verifier_zext)
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
break;
case 64:
/* nop */
Expand Down Expand Up @@ -1838,6 +1853,11 @@ void bpf_jit_compile(struct bpf_prog *prog)
/* Nothing to do here. We support Internal BPF. */
}

bool bpf_jit_needs_zext(void)
{
return true;
}

struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_prog *tmp, *orig_prog = prog;
Expand Down
36 changes: 33 additions & 3 deletions arch/powerpc/net/bpf_jit_comp64.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,26 +504,35 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
/* slw clears top 32 bits */
PPC_SLW(dst_reg, dst_reg, src_reg);
/* skip zero extension move, but set address map. */
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
PPC_SLD(dst_reg, dst_reg, src_reg);
break;
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
/* with imm 0, we still need to clear top 32 bits */
PPC_SLWI(dst_reg, dst_reg, imm);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
if (imm != 0)
PPC_SLDI(dst_reg, dst_reg, imm);
break;
case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
PPC_SRW(dst_reg, dst_reg, src_reg);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
PPC_SRD(dst_reg, dst_reg, src_reg);
break;
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
PPC_SRWI(dst_reg, dst_reg, imm);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
if (imm != 0)
Expand All @@ -548,18 +557,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
*/
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
if (imm == 1) {
/* special mov32 for zext */
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
break;
}
PPC_MR(dst_reg, src_reg);
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
PPC_LI32(dst_reg, imm);
if (imm < 0)
goto bpf_alu32_trunc;
else if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;

bpf_alu32_trunc:
/* Truncate to 32-bits */
if (BPF_CLASS(code) == BPF_ALU)
if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
break;

Expand Down Expand Up @@ -618,10 +634,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case 16:
/* zero-extend 16 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 48);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case 32:
/* zero-extend 32 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 32);
if (!fp->aux->verifier_zext)
/* zero-extend 32 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 32);
break;
case 64:
/* nop */
Expand Down Expand Up @@ -698,14 +717,20 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
PPC_LBZ(dst_reg, src_reg, off);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
PPC_LHZ(dst_reg, src_reg, off);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
PPC_LWZ(dst_reg, src_reg, off);
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
Expand Down Expand Up @@ -1046,6 +1071,11 @@ struct powerpc64_jit_data {
struct codegen_context ctx;
};

bool bpf_jit_needs_zext(void)
{
return true;
}

struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
Expand Down
Loading

0 comments on commit 0462eaa

Please sign in to comment.