Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf-next 2019-07-09

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Lots of libbpf improvements: i) addition of new APIs to attach BPF
   programs to tracing entities such as {k,u}probes or tracepoints,
   ii) improve specification of BTF-defined maps by eliminating the
   need for data initialization for some of the members, iii) addition
   of a high-level API for setting up and polling perf buffers for
   BPF event output helpers, all from Andrii.

2) Add "prog run" subcommand to bpftool in order to test-run programs
   through the kernel testing infrastructure of BPF, from Quentin.

3) Improve verifier for BPF sockaddr programs to support 8-byte stores
   for user_ip6 and msg_src_ip6 members given clang tends to generate
   such stores, from Stanislav.

4) Enable the new BPF JIT zero-extension optimization for further
   riscv64 ALU ops, from Luke.

5) Fix a bpftool json JIT dump crash on powerpc, from Jiri.

6) Fix an AF_XDP race in generic XDP's receive path, from Ilya.

7) Various smaller fixes from Ilya, Yue and Arnd.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jul 9, 2019
2 parents 7650b1a + bf0bdd1 commit 17ccf9e
Show file tree
Hide file tree
Showing 67 changed files with 2,490 additions and 1,062 deletions.
16 changes: 8 additions & 8 deletions arch/riscv/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,31 +757,31 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_ADD | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_X:
emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
emit(rv_and(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
emit(rv_or(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
emit(rv_xor(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_X:
Expand Down Expand Up @@ -811,13 +811,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_RSH | BPF_X:
emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X:
emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;

Expand All @@ -826,7 +826,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_NEG:
emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
rv_subw(rd, RV_REG_ZERO, rd), ctx);
if (!is64)
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;

Expand Down
6 changes: 6 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,12 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
return size <= size_default && (size & (size - 1)) == 0;
}

#define bpf_ctx_wide_store_ok(off, size, type, field) \
(size == sizeof(__u64) && \
off >= offsetof(type, field) && \
off + sizeof(__u64) <= offsetofend(type, field) && \
off % sizeof(__u64) == 0)

#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))

static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
Expand Down
4 changes: 1 addition & 3 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2223,9 +2223,7 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)

static inline void tcp_bpf_rtt(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);

if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG))
if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG))
tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
}

Expand Down
2 changes: 2 additions & 0 deletions include/net/xdp_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ struct xdp_sock {
* in the SKB destructor callback.
*/
spinlock_t tx_completion_lock;
/* Protects generic receive. */
spinlock_t rx_lock;
u64 rx_dropped;
};

Expand Down
6 changes: 3 additions & 3 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3247,7 +3247,7 @@ struct bpf_sock_addr {
__u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
__u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write.
* Stored in network byte order.
*/
__u32 user_port; /* Allows 4-byte read and write.
Expand All @@ -3256,10 +3256,10 @@ struct bpf_sock_addr {
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
__u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
__u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write.
* Stored in network byte order.
*/
__bpf_md_ptr(struct bpf_sock *, sk);
Expand Down
4 changes: 4 additions & 0 deletions kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);

#ifdef CONFIG_NET
static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
enum bpf_attach_type attach_type)
{
Expand Down Expand Up @@ -1120,6 +1121,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
#endif

static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
size_t *lenp)
Expand Down Expand Up @@ -1386,10 +1388,12 @@ static const struct bpf_func_proto *
cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
#ifdef CONFIG_NET
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
#endif
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
Expand Down
22 changes: 14 additions & 8 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -6890,6 +6890,16 @@ static bool sock_addr_is_valid_access(int off, int size,
if (!bpf_ctx_narrow_access_ok(off, size, size_default))
return false;
} else {
if (bpf_ctx_wide_store_ok(off, size,
struct bpf_sock_addr,
user_ip6))
return true;

if (bpf_ctx_wide_store_ok(off, size,
struct bpf_sock_addr,
msg_src_ip6))
return true;

if (size != size_default)
return false;
}
Expand Down Expand Up @@ -7730,17 +7740,14 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
* SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
*
* It doesn't support SIZE argument though since narrow stores are not
* supported for now.
*
* In addition it uses Temporary Field TF (member of struct S) as the 3rd
* "register" since two registers available in convert_ctx_access are not
* enough: we can't override neither SRC, since it contains value to store, nor
* DST since it contains pointer to context that may be used by later
* instructions. But we need a temporary place to save pointer to nested
* structure whose field we want to store to.
*/
#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
do { \
int tmp_reg = BPF_REG_9; \
if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
Expand All @@ -7751,8 +7758,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(S, TF)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
*insn++ = BPF_STX_MEM( \
BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
target_size) \
+ OFF); \
Expand All @@ -7764,8 +7770,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
TF) \
do { \
if (type == BPF_WRITE) { \
SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
TF); \
SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
OFF, TF); \
} else { \
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
S, NS, F, NF, SIZE, OFF); \
Expand Down
31 changes: 22 additions & 9 deletions net/xdp/xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
u64 addr;
int err;

if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
spin_lock_bh(&xs->rx_lock);

if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) {
err = -EINVAL;
goto out_unlock;
}

if (!xskq_peek_addr(xs->umem->fq, &addr) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
err = -ENOSPC;
goto out_drop;
}

addr += xs->umem->headroom;
Expand All @@ -144,13 +148,21 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
memcpy(buffer, xdp->data_meta, len + metalen);
addr += metalen;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
xsk_flush(xs);
return 0;
}
if (err)
goto out_drop;

xskq_discard_addr(xs->umem->fq);
xskq_produce_flush_desc(xs->rx);

spin_unlock_bh(&xs->rx_lock);

xs->sk.sk_data_ready(&xs->sk);
return 0;

out_drop:
xs->rx_dropped++;
out_unlock:
spin_unlock_bh(&xs->rx_lock);
return err;
}

Expand Down Expand Up @@ -787,6 +799,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,

xs = xdp_sk(sk);
mutex_init(&xs->mutex);
spin_lock_init(&xs->rx_lock);
spin_lock_init(&xs->tx_completion_lock);

mutex_lock(&net->xdp.lock);
Expand Down
34 changes: 34 additions & 0 deletions tools/bpf/bpftool/Documentation/bpftool-prog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ PROG COMMANDS
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
| **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
Expand Down Expand Up @@ -146,6 +147,39 @@ DESCRIPTION
streaming data from BPF programs to user space, one can use
perf events (see also **bpftool-map**\ (8)).

**bpftool prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
Run BPF program *PROG* in the kernel testing infrastructure
for BPF, meaning that the program works on the data and
context provided by the user, and not on actual packets or
monitored functions etc. Return value and duration for the
test run are printed out to the console.

Input data is read from the *FILE* passed with **data_in**.
If this *FILE* is "**-**", input data is read from standard
input. Input context, if any, is read from *FILE* passed with
**ctx_in**. Again, "**-**" can be used to read from standard
input, but only if standard input is not already in use for
input data. If a *FILE* is passed with **data_out**, output
data is written to that file. Similarly, output context is
written to the *FILE* passed with **ctx_out**. For both
output flows, "**-**" can be used to print to the standard
output (as plain text, or JSON if relevant option was
passed). If output keywords are omitted, output data and
context are discarded. Keywords **data_size_out** and
**ctx_size_out** are used to pass the size (in bytes) for the
output buffers to the kernel, although the default of 32 kB
should be more than enough for most cases.

Keyword **repeat** is used to indicate the number of
consecutive runs to perform. Note that output data and
context printed to files correspond to the last of those
runs. The duration printed out at the end of the runs is an
average over all runs performed by the command.

Not all program types support test run. Among those which do,
not all of them can take the **ctx_in**/**ctx_out**
arguments. bpftool does not perform checks on program types.

**bpftool prog help**
Print short help message.

Expand Down
35 changes: 33 additions & 2 deletions tools/bpf/bpftool/bash-completion/bpftool
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,13 @@ _bpftool()
load|loadall)
local obj

# Propose "load/loadall" to complete "bpftool prog load",
# or bash tries to complete "load" as a filename below.
if [[ ${#words[@]} -eq 3 ]]; then
COMPREPLY=( $( compgen -W "load loadall" -- "$cur" ) )
return 0
fi

if [[ ${#words[@]} -lt 6 ]]; then
_filedir
return 0
Expand Down Expand Up @@ -408,10 +415,34 @@ _bpftool()
tracelog)
return 0
;;
run)
if [[ ${#words[@]} -lt 5 ]]; then
_filedir
return 0
fi
case $prev in
id)
_bpftool_get_prog_ids
return 0
;;
data_in|data_out|ctx_in|ctx_out)
_filedir
return 0
;;
repeat|data_size_out|ctx_size_out)
return 0
;;
*)
_bpftool_once_attr 'data_in data_out data_size_out \
ctx_in ctx_out ctx_size_out repeat'
return 0
;;
esac
;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'dump help pin attach detach load \
show list tracelog' -- "$cur" ) )
COMPREPLY=( $( compgen -W 'dump help pin attach detach \
load loadall show list tracelog run' -- "$cur" ) )
;;
esac
;;
Expand Down
11 changes: 7 additions & 4 deletions tools/bpf/bpftool/jit_disasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
* Licensed under the GNU General Public License, version 2.0 (GPLv2)
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
Expand Down Expand Up @@ -44,11 +46,13 @@ static int fprintf_json(void *out, const char *fmt, ...)
char *s;

va_start(ap, fmt);
if (vasprintf(&s, fmt, ap) < 0)
return -1;
va_end(ap);

if (!oper_count) {
int i;

s = va_arg(ap, char *);

/* Strip trailing spaces */
i = strlen(s) - 1;
while (s[i] == ' ')
Expand All @@ -61,11 +65,10 @@ static int fprintf_json(void *out, const char *fmt, ...)
} else if (!strcmp(fmt, ",")) {
/* Skip */
} else {
s = va_arg(ap, char *);
jsonw_string(json_wtr, s);
oper_count++;
}
va_end(ap);
free(s);
return 0;
}

Expand Down
Loading

0 comments on commit 17ccf9e

Please sign in to comment.