Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2018-01-09

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Prevent out-of-bounds speculation in BPF maps by masking the
   index after bounds checks in order to fix spectre v1, and
   add an option BPF_JIT_ALWAYS_ON into Kconfig that allows for
   removing the BPF interpreter from the kernel in favor of
   JIT-only mode to make spectre v2 harder, from Alexei.

2) Remove false sharing of map refcount with max_entries which
   was used in spectre v1, from Daniel.

3) Add a missing NULL psock check in sockmap in order to fix
   a race, from John.

4) Fix test_align BPF selftest case since a recent change in
   verifier rejects the bit-wise arithmetic on pointers
   earlier but test_align update was missing, from Alexei.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jan 10, 2018
2 parents 4512c43 + 290af86 commit 661e4e3
Show file tree
Hide file tree
Showing 11 changed files with 150 additions and 50 deletions.
26 changes: 18 additions & 8 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,14 @@ struct bpf_map_ops {
};

struct bpf_map {
atomic_t refcnt;
/* 1st cacheline with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
#endif
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
Expand All @@ -52,15 +59,17 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
struct user_struct *user;
const struct bpf_map_ops *ops;
struct work_struct work;
bool unpriv_array;
/* 7 bytes hole */

/* 2nd cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
atomic_t refcnt;
atomic_t usercnt;
struct bpf_map *inner_map_meta;
struct work_struct work;
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
#endif
};

/* function argument constraints */
Expand Down Expand Up @@ -221,6 +230,7 @@ struct bpf_prog_aux {
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same
Expand Down
7 changes: 7 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,13 @@ config BPF_SYSCALL
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.

config BPF_JIT_ALWAYS_ON
bool "Permanently enable BPF JIT and remove BPF interpreter"
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
help
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter

config USERFAULTFD
bool "Enable userfaultfd() system call"
select ANON_INODES
Expand Down
47 changes: 36 additions & 11 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
u64 array_size;
u32 elem_size;

/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
Expand All @@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)

elem_size = round_up(attr->value_size, 8);

max_entries = attr->max_entries;
index_mask = roundup_pow_of_two(max_entries) - 1;

if (unpriv)
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;

array_size = sizeof(*array);
if (percpu)
array_size += (u64) attr->max_entries * sizeof(void *);
array_size += (u64) max_entries * sizeof(void *);
else
array_size += (u64) attr->max_entries * elem_size;
array_size += (u64) max_entries * elem_size;

/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
Expand All @@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;

/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
Expand Down Expand Up @@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;

return array->value + array->elem_size * index;
return array->value + array->elem_size * (index & array->index_mask);
}

/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0;
Expand All @@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)

*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
}

if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
Expand All @@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;

return this_cpu_ptr(array->pptrs[index]);
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}

int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
Expand All @@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
Expand Down Expand Up @@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EEXIST;

if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]),
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
memcpy(array->value + array->elem_size * index,
memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
Expand Down Expand Up @@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
Expand Down Expand Up @@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
Expand All @@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,

*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
}
if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else
Expand Down
19 changes: 19 additions & 0 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
EXPORT_SYMBOL_GPL(__bpf_call_base);

#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
* @ctx: is the data we are operating on
Expand Down Expand Up @@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};

#else
static unsigned int __bpf_prog_ret0(const void *ctx,
const struct bpf_insn *insn)
{
return 0;
}
#endif

bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
{
Expand Down Expand Up @@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);

fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
fp->bpf_func = __bpf_prog_ret0;
#endif

/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
Expand All @@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
*/
if (!bpf_prog_is_dev_bound(fp->aux)) {
fp = bpf_int_jit_compile(fp);
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
if (!fp->jited) {
*err = -ENOTSUPP;
return fp;
}
#endif
} else {
*err = bpf_prog_offload_compile(fp);
if (*err)
Expand Down
11 changes: 9 additions & 2 deletions kernel/bpf/sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)

write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
smap_list_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
* causing the refcnt to hit zero and sock user data (psock)
* to be null and queued for garbage collection.
*/
if (likely(psock)) {
smap_list_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock);
}
write_unlock_bh(&sock->sk_callback_lock);
}
rcu_read_unlock();
Expand Down
36 changes: 36 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err)
return err;
if (func_id == BPF_FUNC_tail_call) {
if (meta.map_ptr == NULL) {
verbose(env, "verifier bug\n");
return -EINVAL;
}
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
}
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err)
return err;
Expand Down Expand Up @@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;

/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
* if (index >= max_entries) goto out;
* index &= array->index_mask;
* to avoid out-of-bounds cpu speculation
*/
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (map_ptr == BPF_MAP_PTR_POISON) {
verbose(env, "tail_call obusing map_ptr\n");
return -EINVAL;
}
if (!map_ptr->unpriv_array)
continue;
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
insn_buf[2] = *insn;
cnt = 3;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;

delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
}

Expand Down
11 changes: 7 additions & 4 deletions lib/test_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -6250,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
return NULL;
}
}
/* We don't expect to fail. */
if (*err) {
pr_cont("FAIL to attach err=%d len=%d\n",
pr_cont("FAIL to prog_create err=%d len=%d\n",
*err, fprog.len);
return NULL;
}
Expand All @@ -6276,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
* checks.
*/
fp = bpf_prog_select_runtime(fp, err);
if (*err) {
pr_cont("FAIL to select_runtime err=%d\n", *err);
return NULL;
}
break;
}

Expand Down Expand Up @@ -6461,8 +6464,8 @@ static __init int test_bpf(void)
pass_cnt++;
continue;
}

return err;
err_cnt++;
continue;
}

pr_cont("jited:%u ", fp->jited);
Expand Down
6 changes: 2 additions & 4 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;

/* We are guaranteed to never error here with cBPF to eBPF
* transitions, since there's no issue with type compatibility
* checks on program arrays.
*/
fp = bpf_prog_select_runtime(fp, &err);
if (err)
goto out_err_free;

kfree(old_prog);
return fp;
Expand Down
6 changes: 6 additions & 0 deletions net/core/sysctl_net_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec
#else
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &one,
#endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
Expand Down
9 changes: 9 additions & 0 deletions net/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -2619,6 +2619,15 @@ static int __init sock_init(void)

core_initcall(sock_init); /* early initcall */

static int __init jit_init(void)
{
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
bpf_jit_enable = 1;
#endif
return 0;
}
pure_initcall(jit_init);

#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
Expand Down
Loading

0 comments on commit 661e4e3

Please sign in to comment.