Skip to content

Commit

Permalink
Merge branch 'bpf-fixes-for-per-cpu-kptr'
Browse files Browse the repository at this point in the history
Hou Tao says:

====================
bpf: Fixes for per-cpu kptr

From: Hou Tao <[email protected]>

Hi,

The patchset aims to fix the problems found in the review of per-cpu
kptr patch-set [0]. Patch #1 moves pcpu_lock after the invocation of
pcpu_chunk_addr_search() and it is a micro-optimization for
free_percpu(). The reason includes it in the patch is that the same
logic is used in newly-added API pcpu_alloc_size(). Patch #2 introduces
pcpu_alloc_size() for dynamic per-cpu area. Patch #2 and #3 use
pcpu_alloc_size() to check whether or not unit_size matches with the
size of underlying per-cpu area and to select a matching bpf_mem_cache.
Patch #4 fixes the freeing of per-cpu kptr when these kptrs are freed by
map destruction. The last patch adds test cases for these problems.

Please see individual patches for details. And comments are always
welcome.

Change Log:
v3:
 * rebased on bpf-next
 * patch 2: update API document to note that pcpu_alloc_size() doesn't
            support statically allocated per-cpu area. (Dennis)
 * patch 1 & 2: add Acked-by from Dennis

v2: https://lore.kernel.org/bpf/[email protected]/
  * add a new patch "don't acquire pcpu_lock for pcpu_chunk_addr_search()"
  * patch 2: change type of bit_off and end to unsigned long (Andrew)
  * patch 2: rename the new API as pcpu_alloc_size and follow 80-column convention (Dennis)
  * patch 5: move the common declaration into bpf.h (Stanislav, Alxei)

v1: https://lore.kernel.org/bpf/[email protected]/

[0]: https://lore.kernel.org/bpf/[email protected]
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
Alexei Starovoitov committed Oct 20, 2023
2 parents da1055b + d440ba9 commit cf559a4
Show file tree
Hide file tree
Showing 9 changed files with 270 additions and 36 deletions.
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);

struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf_mem_alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct bpf_mem_caches;
struct bpf_mem_alloc {
struct bpf_mem_caches __percpu *caches;
struct bpf_mem_cache __percpu *cache;
bool percpu;
struct work_struct work;
};

Expand Down
1 change: 1 addition & 0 deletions include/linux/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ extern void __init setup_per_cpu_areas(void);
extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
extern void free_percpu(void __percpu *__pdata);
extern size_t pcpu_alloc_size(void __percpu *__pdata);

DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T))

Expand Down
24 changes: 14 additions & 10 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1811,8 +1811,6 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
}

void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);

void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock)
{
Expand Down Expand Up @@ -1844,7 +1842,7 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
* bpf_list_head which needs to be freed.
*/
migrate_disable();
__bpf_obj_drop_impl(obj, field->graph_root.value_rec);
__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
migrate_enable();
}
}
Expand Down Expand Up @@ -1883,7 +1881,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,


migrate_disable();
__bpf_obj_drop_impl(obj, field->graph_root.value_rec);
__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
migrate_enable();
}
}
Expand Down Expand Up @@ -1915,8 +1913,10 @@ __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
}

/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
{
struct bpf_mem_alloc *ma;

if (rec && rec->refcount_off >= 0 &&
!refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
/* Object is refcounted and refcount_dec didn't result in 0
Expand All @@ -1928,18 +1928,22 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
if (rec)
bpf_obj_free_fields(rec, p);

if (percpu)
ma = &bpf_global_percpu_ma;
else
ma = &bpf_global_ma;
if (rec && rec->refcount_off >= 0)
bpf_mem_free_rcu(&bpf_global_ma, p);
bpf_mem_free_rcu(ma, p);
else
bpf_mem_free(&bpf_global_ma, p);
bpf_mem_free(ma, p);
}

__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
void *p = p__alloc;

__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
__bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
}

__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
Expand Down Expand Up @@ -1983,7 +1987,7 @@ static int __bpf_list_add(struct bpf_list_node_kern *node,
*/
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
/* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec);
__bpf_obj_drop_impl((void *)n - off, rec, false);
return -EINVAL;
}

Expand Down Expand Up @@ -2082,7 +2086,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root,
*/
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
/* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec);
__bpf_obj_drop_impl((void *)n - off, rec, false);
return -EINVAL;
}

Expand Down
38 changes: 26 additions & 12 deletions kernel/bpf/memalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,21 +491,17 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
struct llist_node *first;
unsigned int obj_size;

/* For per-cpu allocator, the size of free objects in free list doesn't
* match with unit_size and now there is no way to get the size of
* per-cpu pointer saved in free object, so just skip the checking.
*/
if (c->percpu_size)
return 0;

first = c->free_llist.first;
if (!first)
return 0;

obj_size = ksize(first);
if (c->percpu_size)
obj_size = pcpu_alloc_size(((void **)first)[1]);
else
obj_size = ksize(first);
if (obj_size != c->unit_size) {
WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
idx, obj_size, c->unit_size);
WARN_ONCE(1, "bpf_mem_cache[%u]: percpu %d, unexpected object size %u, expect %u\n",
idx, c->percpu_size, obj_size, c->unit_size);
return -EINVAL;
}
return 0;
Expand All @@ -529,6 +525,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
/* room for llist_node and per-cpu pointer */
if (percpu)
percpu_size = LLIST_NODE_SZ + sizeof(void *);
ma->percpu = percpu;

if (size) {
pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
Expand Down Expand Up @@ -878,14 +875,25 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size)
return !ret ? NULL : ret + LLIST_NODE_SZ;
}

static notrace int bpf_mem_free_idx(void *ptr, bool percpu)
{
size_t size;

if (percpu)
size = pcpu_alloc_size(*((void **)ptr));
else
size = ksize(ptr - LLIST_NODE_SZ);
return bpf_mem_cache_idx(size);
}

void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr)
{
int idx;

if (!ptr)
return;

idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ));
idx = bpf_mem_free_idx(ptr, ma->percpu);
if (idx < 0)
return;

Expand All @@ -899,7 +907,7 @@ void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr)
if (!ptr)
return;

idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ));
idx = bpf_mem_free_idx(ptr, ma->percpu);
if (idx < 0)
return;

Expand Down Expand Up @@ -973,6 +981,12 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
return !ret ? NULL : ret + LLIST_NODE_SZ;
}

/* The alignment of dynamic per-cpu area is 8, so c->unit_size and the
* actual size of dynamic per-cpu area will always be matched and there is
* no need to adjust size_index for per-cpu allocation. However for the
* simplicity of the implementation, use an unified size_index for both
* kmalloc and per-cpu allocation.
*/
static __init int bpf_mem_cache_adjust_size(void)
{
unsigned int size;
Expand Down
6 changes: 2 additions & 4 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -626,8 +626,6 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
bpf_timer_cancel_and_free(obj + rec->timer_off);
}

extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);

void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
{
const struct btf_field *fields;
Expand Down Expand Up @@ -662,8 +660,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
field->kptr.btf_id);
migrate_disable();
__bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ?
pointee_struct_meta->record :
NULL);
pointee_struct_meta->record : NULL,
fields[i].type == BPF_KPTR_PERCPU);
migrate_enable();
} else {
field->kptr.dtor(xchgd_field);
Expand Down
35 changes: 32 additions & 3 deletions mm/percpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2244,6 +2244,37 @@ static void pcpu_balance_workfn(struct work_struct *work)
mutex_unlock(&pcpu_alloc_mutex);
}

/**
* pcpu_alloc_size - the size of the dynamic percpu area
* @ptr: pointer to the dynamic percpu area
*
* Returns the size of the @ptr allocation. This is undefined for statically
* defined percpu variables as there is no corresponding chunk->bound_map.
*
* RETURNS:
* The size of the dynamic percpu area.
*
* CONTEXT:
* Can be called from atomic context.
*/
size_t pcpu_alloc_size(void __percpu *ptr)
{
struct pcpu_chunk *chunk;
unsigned long bit_off, end;
void *addr;

if (!ptr)
return 0;

addr = __pcpu_ptr_to_addr(ptr);
/* No pcpu_lock here: ptr has not been freed, so chunk is still alive */
chunk = pcpu_chunk_addr_search(addr);
bit_off = (addr - chunk->base_addr) / PCPU_MIN_ALLOC_SIZE;
end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
bit_off + 1);
return (end - bit_off) * PCPU_MIN_ALLOC_SIZE;
}

/**
* free_percpu - free percpu area
* @ptr: pointer to area to free
Expand All @@ -2267,12 +2298,10 @@ void free_percpu(void __percpu *ptr)
kmemleak_free_percpu(ptr);

addr = __pcpu_ptr_to_addr(ptr);

spin_lock_irqsave(&pcpu_lock, flags);

chunk = pcpu_chunk_addr_search(addr);
off = addr - chunk->base_addr;

spin_lock_irqsave(&pcpu_lock, flags);
size = pcpu_free_area(chunk, off);

pcpu_memcg_free_hook(chunk, off, size);
Expand Down
20 changes: 19 additions & 1 deletion tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@

#include "test_bpf_ma.skel.h"

void test_test_bpf_ma(void)
static void do_bpf_ma_test(const char *name)
{
struct test_bpf_ma *skel;
struct bpf_program *prog;
struct btf *btf;
int i, err;

Expand All @@ -34,6 +35,11 @@ void test_test_bpf_ma(void)
skel->rodata->data_btf_ids[i] = id;
}

prog = bpf_object__find_program_by_name(skel->obj, name);
if (!ASSERT_OK_PTR(prog, "invalid prog name"))
goto out;
bpf_program__set_autoload(prog, true);

err = test_bpf_ma__load(skel);
if (!ASSERT_OK(err, "load"))
goto out;
Expand All @@ -48,3 +54,15 @@ void test_test_bpf_ma(void)
out:
test_bpf_ma__destroy(skel);
}

void test_test_bpf_ma(void)
{
if (test__start_subtest("batch_alloc_free"))
do_bpf_ma_test("test_batch_alloc_free");
if (test__start_subtest("free_through_map_free"))
do_bpf_ma_test("test_free_through_map_free");
if (test__start_subtest("batch_percpu_alloc_free"))
do_bpf_ma_test("test_batch_percpu_alloc_free");
if (test__start_subtest("percpu_free_through_map_free"))
do_bpf_ma_test("test_percpu_free_through_map_free");
}
Loading

0 comments on commit cf559a4

Please sign in to comment.