Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf-next 2018-08-13

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add driver XDP support for veth. This can be used in conjunction with
   redirect of another XDP program e.g. sitting on NIC so the xdp_frame
   can be forwarded to the peer veth directly without modification,
   from Toshiaki.

2) Add a new BPF map type REUSEPORT_SOCKARRAY and prog type SK_REUSEPORT
   in order to provide more control and visibility on where a SO_REUSEPORT
   sk should be located, and the latter enables to directly select a sk
   from the bpf map. This also enables map-in-map for application migration
   use cases, from Martin.

3) Add a new BPF helper bpf_skb_ancestor_cgroup_id() that returns the id
   of cgroup v2 that is the ancestor of the cgroup associated with the
   skb at the ancestor_level, from Andrey.

4) Implement BPF fs map pretty-print support based on BTF data for regular
   hash table and LRU map, from Yonghong.

5) Decouple the ability to attach BTF for a map from the key and value
   pretty-printer in BPF fs, and enable further support of BTF for maps for
   percpu and LPM trie, from Daniel.

6) Implement a better BPF sample of using XDP's CPU redirect feature for
   load balancing SKB processing to remote CPU. The sample implements the
   same XDP load balancing as Suricata does which is symmetric hash based
   on IP and L4 protocol, from Jesper.

7) Revert adding NULL pointer check with WARN_ON_ONCE() in __xdp_return()'s
   critical path as it is ensured that the allocator is present, from Björn.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 13, 2018
2 parents 961d973 + 2ce3206 commit c1617fb
Show file tree
Hide file tree
Showing 56 changed files with 3,707 additions and 176 deletions.
750 changes: 741 additions & 9 deletions drivers/net/veth.c

Large diffs are not rendered by default.

41 changes: 37 additions & 4 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ struct bpf_prog;
struct bpf_map;
struct sock;
struct seq_file;
struct btf;
struct btf_type;

/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
Expand All @@ -48,8 +48,9 @@ struct bpf_map_ops {
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
u32 key_type_id, u32 value_type_id);
int (*map_check_btf)(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type);
};

struct bpf_map {
Expand Down Expand Up @@ -118,9 +119,13 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)

static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
return map->ops->map_seq_show_elem && map->ops->map_check_btf;
return map->btf && map->ops->map_seq_show_elem;
}

int map_check_no_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type);

extern const struct bpf_map_ops bpf_map_offload_ops;

/* function argument constraints */
Expand Down Expand Up @@ -524,6 +529,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
}

struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
int array_map_alloc_check(union bpf_attr *attr);

#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
Expand Down Expand Up @@ -769,6 +775,33 @@ static inline void __xsk_map_flush(struct bpf_map *map)
}
#endif

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}

#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
return -EOPNOTSUPP;
}

static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
void *key, void *value,
u64 map_flags)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */

/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
Expand Down
6 changes: 6 additions & 0 deletions include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
#endif
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
#endif

BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
Expand Down Expand Up @@ -60,4 +63,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
#ifdef CONFIG_INET
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
30 changes: 30 additions & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
}

/**
* cgroup_ancestor - find ancestor of cgroup
* @cgrp: cgroup to find ancestor of
* @ancestor_level: level of ancestor to find starting from root
*
* Find ancestor of cgroup at specified level starting from root if it exists
* and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
* @ancestor_level.
*
* This function is safe to call as long as @cgrp is accessible.
*/
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
int ancestor_level)
{
struct cgroup *ptr;

if (cgrp->level < ancestor_level)
return NULL;

for (ptr = cgrp;
ptr && ptr->level > ancestor_level;
ptr = cgroup_parent(ptr))
;

if (ptr && ptr->level == ancestor_level)
return ptr;

return NULL;
}

/**
* task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
* @task: the task to be tested
Expand Down
51 changes: 51 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct seccomp_data;
struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;

/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
Expand Down Expand Up @@ -537,6 +538,20 @@ struct sk_msg_buff {
struct list_head list;
};

struct bpf_redirect_info {
u32 ifindex;
u32 flags;
struct bpf_map *map;
struct bpf_map *map_to_flush;
unsigned long map_owner;
u32 kern_flags;
};

DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);

/* flags for bpf_redirect_info kern_flags */
#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */

/* Compute the linear packet data range [data, data_end) which
* will be accessed by various program types (cls_bpf, act_bpf,
* lwt, ...). Subsystems allowing direct data access must (!)
Expand Down Expand Up @@ -738,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
void sk_reuseport_prog_free(struct bpf_prog *prog);
int sk_detach_filter(struct sock *sk);
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
unsigned int len);
Expand Down Expand Up @@ -765,6 +781,27 @@ static inline bool bpf_dump_raw_ok(void)
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);

static inline bool xdp_return_frame_no_direct(void)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
}

static inline void xdp_set_return_frame_no_direct(void)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
}

static inline void xdp_clear_return_frame_no_direct(void)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
}

static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
unsigned int pktlen)
{
Expand Down Expand Up @@ -798,6 +835,20 @@ void bpf_warn_invalid_xdp_action(u32 act);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
struct sock *do_msg_redirect_map(struct sk_msg_buff *md);

#ifdef CONFIG_INET
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
struct bpf_prog *prog, struct sk_buff *skb,
u32 hash);
#else
static inline struct sock *
bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
struct bpf_prog *prog, struct sk_buff *skb,
u32 hash)
{
return NULL;
}
#endif

#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
Expand Down
1 change: 1 addition & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
}

struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
void skb_headers_offset_update(struct sk_buff *skb, int off);
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
Expand Down
1 change: 1 addition & 0 deletions include/net/addrconf.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
bool inet_rcv_saddr_any(const struct sock *sk);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);

Expand Down
19 changes: 15 additions & 4 deletions include/net/sock_reuseport.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,36 @@
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <net/sock.h>

extern spinlock_t reuseport_lock;

struct sock_reuseport {
struct rcu_head rcu;

u16 max_socks; /* length of socks */
u16 num_socks; /* elements in socks */
/* The last synq overflow event timestamp of this
* reuse->socks[] group.
*/
unsigned int synq_overflow_ts;
/* ID stays the same even after the size of socks[] grows. */
unsigned int reuseport_id;
bool bind_inany;
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
struct sock *socks[0]; /* array of sock pointers */
};

extern int reuseport_alloc(struct sock *sk);
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
extern int reuseport_alloc(struct sock *sk, bool bind_inany);
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
bool bind_inany);
extern void reuseport_detach_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
struct sk_buff *skb,
int hdr_len);
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
struct bpf_prog *prog);
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
int reuseport_get_id(struct sock_reuseport *reuse);

#endif /* _SOCK_REUSEPORT_H */
30 changes: 28 additions & 2 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <net/inet_hashtables.h>
#include <net/checksum.h>
#include <net/request_sock.h>
#include <net/sock_reuseport.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
Expand Down Expand Up @@ -473,19 +474,44 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
*/
static inline void tcp_synq_overflow(const struct sock *sk)
{
unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
unsigned int last_overflow;
unsigned int now = jiffies;

if (sk->sk_reuseport) {
struct sock_reuseport *reuse;

reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts);
if (time_after32(now, last_overflow + HZ))
WRITE_ONCE(reuse->synq_overflow_ts, now);
return;
}
}

last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
if (time_after32(now, last_overflow + HZ))
tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
}

/* syncookies: no recent synqueue overflow on this listening socket? */
static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
{
unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
unsigned int last_overflow;
unsigned int now = jiffies;

if (sk->sk_reuseport) {
struct sock_reuseport *reuse;

reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts);
return time_after32(now, last_overflow +
TCP_SYNCOOKIE_VALID);
}
}

last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
}

Expand Down
7 changes: 7 additions & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */
};

/* Clear kernel pointers in xdp_frame */
static inline void xdp_scrub_frame(struct xdp_frame *frame)
{
frame->data = NULL;
frame->dev_rx = NULL;
}

/* Convert xdp_buff to xdp_frame */
static inline
struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
Expand Down
Loading

0 comments on commit c1617fb

Please sign in to comment.