Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf-next 2018-10-21

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Implement two new kind of BPF maps, that is, queue and stack
   map along with new peek, push and pop operations, from Mauricio.

2) Add support for MSG_PEEK flag when redirecting into an ingress
   psock sk_msg queue, and add a new helper bpf_msg_push_data() for
   insert data into the message, from John.

3) Allow for BPF programs of type BPF_PROG_TYPE_CGROUP_SKB to use
   direct packet access for __skb_buff, from Song.

4) Use more lightweight barriers for walking perf ring buffer for
   libbpf and perf tool as well. Also, various fixes and improvements
   from verifier side, from Daniel.

5) Add per-symbol visibility for DSO in libbpf and hide by default
   global symbols such as netlink related functions, from Andrey.

6) Two improvements to nfp's BPF offload to check vNIC capabilities
   in case prog is shared with multiple vNICs and to protect against
   mis-initializing atomic counters, from Jakub.

7) Fix for bpftool to use 4 context mode for the nfp disassembler,
   also from Jakub.

8) Fix a return value comparison in test_libbpf.sh and add several
   bpftool improvements in bash completion, documentation of bpf fs
   restrictions and batch mode summary print, from Quentin.

9) Fix a file resource leak in BPF selftest's load_kallsyms()
   helper, from Peng.

10) Fix an unused variable warning in map_lookup_and_delete_elem(),
    from Alexei.

11) Fix bpf_skb_adjust_room() signature in BPF UAPI helper doc,
    from Nicolas.

12) Add missing executables to .gitignore in BPF selftests, from Anders.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Oct 22, 2018
2 parents 92303c8 + fe8eccc commit a19c59c
Show file tree
Hide file tree
Showing 62 changed files with 2,315 additions and 394 deletions.
10 changes: 9 additions & 1 deletion drivers/net/ethernet/netronome/nfp/bpf/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ enum nfp_bpf_map_use {
NFP_MAP_USE_ATOMIC_CNT,
};

struct nfp_bpf_map_word {
unsigned char type :4;
unsigned char non_zero_update :1;
};

/**
* struct nfp_bpf_map - private per-map data attached to BPF maps for offload
* @offmap: pointer to the offloaded BPF map
Expand All @@ -202,7 +207,7 @@ struct nfp_bpf_map {
struct nfp_app_bpf *bpf;
u32 tid;
struct list_head l;
enum nfp_bpf_map_use use_map[];
struct nfp_bpf_map_word use_map[];
};

struct nfp_bpf_neutral_map {
Expand Down Expand Up @@ -436,6 +441,7 @@ struct nfp_bpf_subprog_info {
* @prog: machine code
* @prog_len: number of valid instructions in @prog array
* @__prog_alloc_len: alloc size of @prog array
* @stack_size: total amount of stack used
* @verifier_meta: temporary storage for verifier's insn meta
* @type: BPF program type
* @last_bpf_off: address of the last instruction translated from BPF
Expand All @@ -460,6 +466,8 @@ struct nfp_prog {
unsigned int prog_len;
unsigned int __prog_alloc_len;

unsigned int stack_size;

struct nfp_insn_meta *verifier_meta;

enum bpf_prog_type type;
Expand Down
32 changes: 30 additions & 2 deletions drivers/net/ethernet/netronome/nfp/bpf/offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,25 @@ static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value)
unsigned int i;

for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT)
if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT)
word[i] = (__force u32)cpu_to_be32(word[i]);
}

/* Mark value as unsafely initialized in case it becomes atomic later
* and we didn't byte swap something non-byte swap neutral.
*/
static void
nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value)
{
u32 *word = value;
unsigned int i;

for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
if (nfp_map->use_map[i].type == NFP_MAP_UNUSED &&
word[i] != (__force u32)cpu_to_be32(word[i]))
nfp_map->use_map[i].non_zero_update = 1;
}

static int
nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap,
void *key, void *value)
Expand All @@ -285,6 +300,7 @@ nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap,
void *key, void *value, u64 flags)
{
nfp_map_bpf_byte_swap(offmap->dev_priv, value);
nfp_map_bpf_byte_swap_record(offmap->dev_priv, value);
return nfp_bpf_ctrl_update_entry(offmap, key, value, flags);
}

Expand Down Expand Up @@ -473,7 +489,7 @@ nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int max_mtu;
unsigned int max_mtu, max_stack, max_prog_len;
dma_addr_t dma_addr;
void *img;
int err;
Expand All @@ -484,6 +500,18 @@ nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
return -EOPNOTSUPP;
}

max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
if (nfp_prog->stack_size > max_stack) {
NL_SET_ERR_MSG_MOD(extack, "stack too large");
return -EOPNOTSUPP;
}

max_prog_len = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
if (nfp_prog->prog_len > max_prog_len) {
NL_SET_ERR_MSG_MOD(extack, "program too long");
return -EOPNOTSUPP;
}

img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv);
if (IS_ERR(img))
return PTR_ERR(img);
Expand Down
69 changes: 59 additions & 10 deletions drivers/net/ethernet/netronome/nfp/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,46 @@ nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
nfp_prog->adjust_head_location = location;
}

static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env)
{
const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3;
struct bpf_offloaded_map *offmap;
struct bpf_func_state *state;
struct nfp_bpf_map *nfp_map;
int off, i;

state = env->cur_state->frame[reg3->frameno];

/* We need to record each time update happens with non-zero words,
* in case such word is used in atomic operations.
* Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before.
*/

offmap = map_to_offmap(reg1->map_ptr);
nfp_map = offmap->dev_priv;
off = reg3->off + reg3->var_off.value;

for (i = 0; i < offmap->map.value_size; i++) {
struct bpf_stack_state *stack_entry;
unsigned int soff;

soff = -(off + i) - 1;
stack_entry = &state->stack[soff / BPF_REG_SIZE];
if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO)
continue;

if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) {
pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n",
i, soff);
return false;
}
nfp_map->use_map[i / 4].non_zero_update = 1;
}

return true;
}

static int
nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
Expand Down Expand Up @@ -171,7 +211,8 @@ nfp_bpf_check_helper_call(struct nfp_prog *nfp_prog,
bpf->helpers.map_update, reg1) ||
!nfp_bpf_stack_arg_ok("map_update", env, reg2,
meta->func_id ? &meta->arg2 : NULL) ||
!nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL))
!nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) ||
!nfp_bpf_map_update_value_ok(env))
return -EOPNOTSUPP;
break;

Expand Down Expand Up @@ -352,15 +393,22 @@ nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env,
struct nfp_bpf_map *nfp_map,
unsigned int off, enum nfp_bpf_map_use use)
{
if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED &&
nfp_map->use_map[off / 4] != use) {
if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED &&
nfp_map->use_map[off / 4].type != use) {
pr_vlog(env, "map value use type conflict %s vs %s off: %u\n",
nfp_bpf_map_use_name(nfp_map->use_map[off / 4]),
nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type),
nfp_bpf_map_use_name(use), off);
return -EOPNOTSUPP;
}

nfp_map->use_map[off / 4] = use;
if (nfp_map->use_map[off / 4].non_zero_update &&
use == NFP_MAP_USE_ATOMIC_CNT) {
pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n",
off);
return -EOPNOTSUPP;
}

nfp_map->use_map[off / 4].type = use;

return 0;
}
Expand Down Expand Up @@ -699,9 +747,9 @@ nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog, unsigned int cnt)

static int nfp_bpf_finalize(struct bpf_verifier_env *env)
{
unsigned int stack_size, stack_needed;
struct bpf_subprog_info *info;
struct nfp_prog *nfp_prog;
unsigned int max_stack;
struct nfp_net *nn;
int i;

Expand Down Expand Up @@ -729,11 +777,12 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
}

nn = netdev_priv(env->prog->aux->offload->netdev);
stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
stack_needed = nfp_bpf_get_stack_usage(nfp_prog, env->prog->len);
if (stack_needed > stack_size) {
max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog,
env->prog->len);
if (nfp_prog->stack_size > max_stack) {
pr_vlog(env, "stack too large: program %dB > FW stack %dB\n",
stack_needed, stack_size);
nfp_prog->stack_size, max_stack);
return -EOPNOTSUPP;
}

Expand Down
7 changes: 7 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
int (*map_pop_elem)(struct bpf_map *map, void *value);
int (*map_peek_elem)(struct bpf_map *map, void *value);

/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
Expand Down Expand Up @@ -138,6 +141,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */

/* the following constraints used to prototype bpf_memcmp() and other
* functions that access data on eBPF program stack
Expand Down Expand Up @@ -810,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
extern const struct bpf_func_proto bpf_map_push_elem_proto;
extern const struct bpf_func_proto bpf_map_pop_elem_proto;
extern const struct bpf_func_proto bpf_map_peek_elem_proto;

extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
Expand Down
4 changes: 3 additions & 1 deletion include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
#ifdef CONFIG_PERF_EVENTS
BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
Expand All @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
21 changes: 21 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
cb->data_end = skb->data + skb_headlen(skb);
}

/* Similar to bpf_compute_data_pointers(), except that save orginal
* data in cb->data and cb->meta_data for restore.
*/
static inline void bpf_compute_and_save_data_end(
struct sk_buff *skb, void **saved_data_end)
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

*saved_data_end = cb->data_end;
cb->data_end = skb->data + skb_headlen(skb);
}

/* Restore data saved by bpf_compute_data_pointers(). */
static inline void bpf_restore_data_end(
struct sk_buff *skb, void *saved_data_end)
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

cb->data_end = saved_data_end;
}

static inline u8 *bpf_skb_cb(struct sk_buff *skb)
{
/* eBPF programs may read/write skb->cb[] area to transfer meta
Expand Down
42 changes: 33 additions & 9 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
{
dst->sg.data[which] = src->sg.data[which];
dst->sg.data[which].length = size;
dst->sg.size += size;
src->sg.data[which].length -= size;
src->sg.data[which].offset += size;
}
Expand All @@ -186,21 +187,29 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
sk_msg_init(src);
}

static inline bool sk_msg_full(const struct sk_msg *msg)
{
return (msg->sg.end == msg->sg.start) && msg->sg.size;
}

static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
{
if (sk_msg_full(msg))
return MAX_MSG_FRAGS;

return msg->sg.end >= msg->sg.start ?
msg->sg.end - msg->sg.start :
msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
}

static inline bool sk_msg_full(const struct sk_msg *msg)
static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
{
return (msg->sg.end == msg->sg.start) && msg->sg.size;
return &msg->sg.data[which];
}

static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which)
{
return &msg->sg.data[which];
return msg->sg.data[which];
}

static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
Expand Down Expand Up @@ -266,11 +275,6 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
return rcu_dereference_sk_user_data(sk);
}

static inline bool sk_has_psock(struct sock *sk)
{
return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
}

static inline void sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
Expand Down Expand Up @@ -370,6 +374,26 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
return test_bit(bit, &psock->state);
}

static inline struct sk_psock *sk_psock_get_checked(struct sock *sk)
{
struct sk_psock *psock;

rcu_read_lock();
psock = sk_psock(sk);
if (psock) {
if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
psock = ERR_PTR(-EBUSY);
goto out;
}

if (!refcount_inc_not_zero(&psock->refcnt))
psock = ERR_PTR(-EBUSY);
}
out:
rcu_read_unlock();
return psock;
}

static inline struct sk_psock *sk_psock_get(struct sock *sk)
{
struct sk_psock *psock;
Expand Down
9 changes: 1 addition & 8 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2051,11 +2051,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
#define TCP_ULP_MAX 128
#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)

enum {
TCP_ULP_TLS,
TCP_ULP_BPF,
};

struct tcp_ulp_ops {
struct list_head list;

Expand All @@ -2064,9 +2059,7 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);

int uid;
char name[TCP_ULP_NAME_MAX];
bool user_visible;
struct module *owner;
};
int tcp_register_ulp(struct tcp_ulp_ops *type);
Expand All @@ -2089,7 +2082,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len);
struct msghdr *msg, int len, int flags);

/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
Expand Down
Loading

0 comments on commit a19c59c

Please sign in to comment.