Skip to content

Commit

Permalink
bpf: Refactor kptr_off_tab into btf_record
Browse files Browse the repository at this point in the history
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.

Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.

The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.

Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.

Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.

Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
kkdwivedi authored and Alexei Starovoitov committed Nov 4, 2022
1 parent a28ace7 commit aa3496a
Show file tree
Hide file tree
Showing 8 changed files with 332 additions and 263 deletions.
125 changes: 81 additions & 44 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,35 +165,41 @@ struct bpf_map_ops {
};

enum {
/* Support at most 8 pointers in a BPF map value */
BPF_MAP_VALUE_OFF_MAX = 8,
BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX +
/* Support at most 8 pointers in a BTF type */
BTF_FIELDS_MAX = 8,
BPF_MAP_OFF_ARR_MAX = BTF_FIELDS_MAX +
1 + /* for bpf_spin_lock */
1, /* for bpf_timer */
};

enum bpf_kptr_type {
BPF_KPTR_UNREF,
BPF_KPTR_REF,
enum btf_field_type {
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
};

struct bpf_map_value_off_desc {
struct btf_field_kptr {
struct btf *btf;
struct module *module;
btf_dtor_kfunc_t dtor;
u32 btf_id;
};

struct btf_field {
u32 offset;
enum bpf_kptr_type type;
struct {
struct btf *btf;
struct module *module;
btf_dtor_kfunc_t dtor;
u32 btf_id;
} kptr;
enum btf_field_type type;
union {
struct btf_field_kptr kptr;
};
};

struct bpf_map_value_off {
u32 nr_off;
struct bpf_map_value_off_desc off[];
struct btf_record {
u32 cnt;
u32 field_mask;
struct btf_field fields[];
};

struct bpf_map_off_arr {
struct btf_field_offs {
u32 cnt;
u32 field_off[BPF_MAP_OFF_ARR_MAX];
u8 field_sz[BPF_MAP_OFF_ARR_MAX];
Expand All @@ -215,7 +221,7 @@ struct bpf_map {
u64 map_extra; /* any per-map-type extra fields */
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
struct bpf_map_value_off *kptr_off_tab;
struct btf_record *record;
int timer_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
Expand All @@ -227,7 +233,7 @@ struct bpf_map {
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
struct bpf_map_off_arr *off_arr;
struct btf_field_offs *field_offs;
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
Expand All @@ -251,6 +257,37 @@ struct bpf_map {
bool frozen; /* write-once; write-protected by freeze_mutex */
};

static inline u32 btf_field_type_size(enum btf_field_type type)
{
switch (type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return sizeof(u64);
default:
WARN_ON_ONCE(1);
return 0;
}
}

static inline u32 btf_field_type_align(enum btf_field_type type)
{
switch (type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return __alignof__(u64);
default:
WARN_ON_ONCE(1);
return 0;
}
}

static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type)
{
if (IS_ERR_OR_NULL(rec))
return false;
return rec->field_mask & type;
}

static inline bool map_value_has_spin_lock(const struct bpf_map *map)
{
return map->spin_lock_off >= 0;
Expand All @@ -261,23 +298,19 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
return map->timer_off >= 0;
}

static inline bool map_value_has_kptrs(const struct bpf_map *map)
{
return !IS_ERR_OR_NULL(map->kptr_off_tab);
}

static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
if (unlikely(map_value_has_spin_lock(map)))
memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
if (unlikely(map_value_has_timer(map)))
memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
if (unlikely(map_value_has_kptrs(map))) {
struct bpf_map_value_off *tab = map->kptr_off_tab;
if (!IS_ERR_OR_NULL(map->record)) {
struct btf_field *fields = map->record->fields;
u32 cnt = map->record->cnt;
int i;

for (i = 0; i < tab->nr_off; i++)
*(u64 *)(dst + tab->off[i].offset) = 0;
for (i = 0; i < cnt; i++)
memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
}
}

Expand All @@ -303,19 +336,20 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b
u32 curr_off = 0;
int i;

if (likely(!map->off_arr)) {
if (likely(!map->field_offs)) {
if (long_memcpy)
bpf_long_memcpy(dst, src, round_up(map->value_size, 8));
else
memcpy(dst, src, map->value_size);
return;
}

for (i = 0; i < map->off_arr->cnt; i++) {
u32 next_off = map->off_arr->field_off[i];
for (i = 0; i < map->field_offs->cnt; i++) {
u32 next_off = map->field_offs->field_off[i];
u32 sz = next_off - curr_off;

memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
curr_off += map->off_arr->field_sz[i];
memcpy(dst + curr_off, src + curr_off, sz);
curr_off += map->field_offs->field_sz[i];
}
memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
}
Expand All @@ -335,16 +369,17 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
u32 curr_off = 0;
int i;

if (likely(!map->off_arr)) {
if (likely(!map->field_offs)) {
memset(dst, 0, map->value_size);
return;
}

for (i = 0; i < map->off_arr->cnt; i++) {
u32 next_off = map->off_arr->field_off[i];
for (i = 0; i < map->field_offs->cnt; i++) {
u32 next_off = map->field_offs->field_off[i];
u32 sz = next_off - curr_off;

memset(dst + curr_off, 0, next_off - curr_off);
curr_off += map->off_arr->field_sz[i];
memset(dst + curr_off, 0, sz);
curr_off += map->field_offs->field_sz[i];
}
memset(dst + curr_off, 0, map->value_size - curr_off);
}
Expand Down Expand Up @@ -1699,11 +1734,13 @@ void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);

struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset);
void bpf_map_free_kptr_off_tab(struct bpf_map *map);
struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);
struct btf_field *btf_record_find(const struct btf_record *rec,
u32 offset, enum btf_field_type type);
void btf_record_free(struct btf_record *rec);
void bpf_map_free_record(struct bpf_map *map);
struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);

struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
Expand Down
3 changes: 1 addition & 2 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
Expand Down
13 changes: 6 additions & 7 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,7 @@ static void check_and_free_fields(struct bpf_array *arr, void *val)
{
if (map_value_has_timer(&arr->map))
bpf_timer_cancel_and_free(val + arr->map.timer_off);
if (map_value_has_kptrs(&arr->map))
bpf_map_free_kptrs(&arr->map, val);
bpf_obj_free_fields(arr->map.record, val);
}

/* Called from syscall or from eBPF program */
Expand Down Expand Up @@ -409,7 +408,7 @@ static void array_map_free_timers(struct bpf_map *map)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;

/* We don't reset or free kptr on uref dropping to zero. */
/* We don't reset or free fields other than timer on uref dropping to zero. */
if (!map_value_has_timer(map))
return;

Expand All @@ -423,22 +422,22 @@ static void array_map_free(struct bpf_map *map)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;

if (map_value_has_kptrs(map)) {
if (!IS_ERR_OR_NULL(map->record)) {
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
for (i = 0; i < array->map.max_entries; i++) {
void __percpu *pptr = array->pptrs[i & array->index_mask];
int cpu;

for_each_possible_cpu(cpu) {
bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu));
bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
cond_resched();
}
}
} else {
for (i = 0; i < array->map.max_entries; i++)
bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
}
bpf_map_free_kptr_off_tab(map);
bpf_map_free_record(map);
}

if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
Expand Down
Loading

0 comments on commit aa3496a

Please sign in to comment.