Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Andrii Nakryiko says:

====================
bpf-next 2021-07-30

We've added 64 non-merge commits during the last 15 day(s) which contain
a total of 83 files changed, 5027 insertions(+), 1808 deletions(-).

The main changes are:

1) BTF-guided binary data dumping libbpf API, from Alan.

2) Internal factoring out of libbpf CO-RE relocation logic, from Alexei.

3) Ambient BPF run context and cgroup storage cleanup, from Andrii.

4) Few small API additions for libbpf 1.0 effort, from Evgeniy and Hengqi.

5) bpf_program__attach_kprobe_opts() fixes in libbpf, from Jiri.

6) bpf_{get,set}sockopt() support in BPF iterators, from Martin.

7) BPF map pinning improvements in libbpf, from Martynas.

8) Improved module BTF support in libbpf and bpftool, from Quentin.

9) Bpftool cleanups and documentation improvements, from Quentin.

10) Libbpf improvements for supporting CO-RE on old kernels, from Shuyi.

11) Increased maximum cgroup storage size, from Stanislav.

12) Small fixes and improvements to BPF tests and samples, from various folks.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (64 commits)
  tools: bpftool: Complete metrics list in "bpftool prog profile" doc
  tools: bpftool: Document and add bash completion for -L, -B options
  selftests/bpf: Update bpftool's consistency script for checking options
  tools: bpftool: Update and synchronise option list in doc and help msg
  tools: bpftool: Complete and synchronise attach or map types
  selftests/bpf: Check consistency between bpftool source, doc, completion
  tools: bpftool: Slightly ease bash completion updates
  unix_bpf: Fix a potential deadlock in unix_dgram_bpf_recvmsg()
  libbpf: Add btf__load_vmlinux_btf/btf__load_module_btf
  tools: bpftool: Support dumping split BTF by id
  libbpf: Add split BTF support for btf__load_from_kernel_by_id()
  tools: Replace btf__get_from_id() with btf__load_from_kernel_by_id()
  tools: Free BTF objects at various locations
  libbpf: Rename btf__get_from_id() as btf__load_from_kernel_by_id()
  libbpf: Rename btf__load() as btf__load_into_kernel()
  libbpf: Return non-null error on failures in libbpf_find_prog_btf_id()
  bpf: Emit better log message if bpf_iter ctx arg btf_id == 0
  tools/resolve_btfids: Emit warnings and patch zero id for missing symbols
  bpf: Increase supported cgroup storage value size
  libbpf: Fix race when pinning maps in parallel
  ...
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Jul 31, 2021
2 parents d2e11fd + ab0720c commit d39e8b9
Show file tree
Hide file tree
Showing 83 changed files with 5,027 additions and 1,808 deletions.
23 changes: 16 additions & 7 deletions Documentation/networking/filter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,6 @@ Examples for low-level BPF:
ret #-1
drop: ret #0

**(Accelerated) VLAN w/ id 10**::

ld vlan_tci
jneq #10, drop
ret #-1
drop: ret #0

**icmp random packet sampling, 1 in 4**::

ldh [12]
Expand Down Expand Up @@ -358,6 +351,22 @@ Examples for low-level BPF:
bad: ret #0 /* SECCOMP_RET_KILL_THREAD */
good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */
Examples for low-level BPF extension:

**Packet for interface index 13**::

ld ifidx
jneq #13, drop
ret #-1
drop: ret #0

**(Accelerated) VLAN w/ id 10**::

ld vlan_tci
jneq #10, drop
ret #-1
drop: ret #0

The above example code can be placed into a file (here called "foo"), and
then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
and cls_bpf understands and can directly be loaded with. Example with above
Expand Down
54 changes: 0 additions & 54 deletions include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,6 @@ struct task_struct;
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])

#define BPF_CGROUP_STORAGE_NEST_MAX 8

struct bpf_cgroup_storage_info {
struct task_struct *task;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};

/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
* to use bpf cgroup storage simultaneously.
*/
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);

#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)

Expand Down Expand Up @@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}

static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
enum bpf_cgroup_storage_type stype;
int i, err = 0;

preempt_disable();
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
continue;

this_cpu_write(bpf_cgroup_storage_info[i].task, current);
for_each_cgroup_storage_type(stype)
this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
storage[stype]);
goto out;
}
err = -EBUSY;
WARN_ON_ONCE(1);

out:
preempt_enable();
return err;
}

static inline void bpf_cgroup_storage_unset(void)
{
int i;

for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
continue;

this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
return;
}
}

struct bpf_cgroup_storage *
cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
void *key, bool locked);
Expand Down Expand Up @@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}

static inline int bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
Expand Down
62 changes: 42 additions & 20 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1142,38 +1142,40 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);

struct bpf_run_ctx {};

struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx;
struct bpf_prog_array_item *prog_item;
};

/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)

/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
* if bpf_cgroup_storage_set() failed, the rest of programs
* will not execute. This should be a really rare scenario
* as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
* preemptions all between bpf_cgroup_storage_set() and
* bpf_cgroup_storage_unset() on the same cpu.
*/
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
struct bpf_run_ctx *old_run_ctx; \
struct bpf_cg_run_ctx run_ctx; \
u32 _ret = 1; \
u32 func_ret; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \
while ((_prog = READ_ONCE(_item->prog))) { \
if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
break; \
run_ctx.prog_item = _item; \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \
bpf_cgroup_storage_unset(); \
*(ret_flags) |= (func_ret >> 1); \
_item++; \
} \
bpf_reset_run_ctx(old_run_ctx); \
rcu_read_unlock(); \
migrate_enable(); \
_ret; \
Expand All @@ -1184,24 +1186,22 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
struct bpf_run_ctx *old_run_ctx; \
struct bpf_cg_run_ctx run_ctx; \
u32 _ret = 1; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
if (!set_cg_storage) { \
_ret &= func(_prog, ctx); \
} else { \
if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
break; \
_ret &= func(_prog, ctx); \
bpf_cgroup_storage_unset(); \
} \
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
while ((_prog = READ_ONCE(_item->prog))) { \
run_ctx.prog_item = _item; \
_ret &= func(_prog, ctx); \
_item++; \
} \
bpf_reset_run_ctx(old_run_ctx); \
_out: \
rcu_read_unlock(); \
migrate_enable(); \
Expand Down Expand Up @@ -1284,6 +1284,20 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}

static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
struct bpf_run_ctx *old_ctx;

old_ctx = current->bpf_ctx;
current->bpf_ctx = new_ctx;
return old_ctx;
}

static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
{
current->bpf_ctx = old_ctx;
}

extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
Expand Down Expand Up @@ -1428,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
struct seq_file *seq);
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
typedef const struct bpf_func_proto *
(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
const struct bpf_prog *prog);

enum bpf_iter_feature {
BPF_ITER_RESCHED = BIT(0),
Expand All @@ -1440,6 +1457,7 @@ struct bpf_iter_reg {
bpf_iter_detach_target_t detach_target;
bpf_iter_show_fdinfo_t show_fdinfo;
bpf_iter_fill_link_info_t fill_link_info;
bpf_iter_get_func_proto_t get_func_proto;
u32 ctx_arg_info_size;
u32 feature;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
Expand All @@ -1462,6 +1480,8 @@ struct bpf_iter__bpf_map_elem {
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
Expand Down Expand Up @@ -2036,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
Expand Down
3 changes: 3 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
Expand Down Expand Up @@ -1379,6 +1380,8 @@ struct task_struct {
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
/* Used for BPF run context */
struct bpf_run_ctx *bpf_ctx;
#endif

#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
Expand Down
6 changes: 6 additions & 0 deletions include/net/inet_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ struct inet_hashinfo {
____cacheline_aligned_in_smp;
};

#define inet_lhash2_for_each_icsk_continue(__icsk) \
hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)

#define inet_lhash2_for_each_icsk(__icsk, list) \
hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)

#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)

Expand Down
1 change: 0 additions & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1958,7 +1958,6 @@ struct tcp_iter_state {
struct seq_net_private p;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num;
loff_t last_pos;
};
Expand Down
22 changes: 22 additions & 0 deletions kernel/bpf/bpf_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
return supported;
}

const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
const struct bpf_iter_target_info *tinfo;
const struct bpf_func_proto *fn = NULL;

mutex_lock(&targets_mutex);
list_for_each_entry(tinfo, &targets, list) {
if (tinfo->btf_id == prog->aux->attach_btf_id) {
const struct bpf_iter_reg *reg_info;

reg_info = tinfo->reg_info;
if (reg_info->get_func_proto)
fn = reg_info->get_func_proto(func_id, prog);
break;
}
}
mutex_unlock(&targets_mutex);

return fn;
}

static void bpf_iter_link_release(struct bpf_link *link)
{
struct bpf_iter_link *iter_link =
Expand Down
5 changes: 5 additions & 0 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -4825,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];

if (ctx_arg_info->offset == off) {
if (!ctx_arg_info->btf_id) {
bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
return false;
}

info->reg_type = ctx_arg_info->reg_type;
info->btf = btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
Expand Down
16 changes: 5 additions & 11 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
};

#ifdef CONFIG_CGROUP_BPF
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);

BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
Expand All @@ -403,17 +401,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
* verifier checks that its value is correct.
*/
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
struct bpf_cgroup_storage *storage = NULL;
struct bpf_cgroup_storage *storage;
struct bpf_cg_run_ctx *ctx;
void *ptr;
int i;

for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
continue;

storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
break;
}
/* get current cgroup storage from BPF run context */
ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
storage = ctx->prog_item->cgroup_storage[stype];

if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0];
Expand Down
Loading

0 comments on commit d39e8b9

Please sign in to comment.