Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf 2020-06-17

The following pull-request contains BPF updates for your *net* tree.

We've added 10 non-merge commits during the last 2 day(s) which contain
a total of 14 files changed, 158 insertions(+), 59 deletions(-).

The main changes are:

1) Important fix for bpf_probe_read_kernel_str() return value, from Andrii.

2) [gs]etsockopt fix for large optlen, from Stanislav.

3) devmap allocation fix, from Toke.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jun 17, 2020
2 parents 6911967 + 8030e25 commit b9d37bb
Show file tree
Hide file tree
Showing 14 changed files with 158 additions and 59 deletions.
14 changes: 14 additions & 0 deletions Documentation/bpf/prog_cgroup_sockopt.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ then the next program in the chain (A) will see those changes,
*not* the original input ``setsockopt`` arguments. The potentially
modified values will be then passed down to the kernel.

Large optval
============
When the ``optval`` is greater than the ``PAGE_SIZE``, the BPF program
can access only the first ``PAGE_SIZE`` of that data. So it has to options:

* Set ``optlen`` to zero, which indicates that the kernel should
use the original buffer from the userspace. Any modifications
done by the BPF program to the ``optval`` are ignored.
* Set ``optlen`` to the value less than ``PAGE_SIZE``, which
indicates that the kernel should use BPF's trimmed ``optval``.

When the BPF program returns with the ``optlen`` greater than
``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno.

Example
=======

Expand Down
2 changes: 1 addition & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3168,7 +3168,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
Expand Down
53 changes: 33 additions & 20 deletions kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,

static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{
if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
if (unlikely(max_optlen < 0))
return -EINVAL;

if (unlikely(max_optlen > PAGE_SIZE)) {
/* We don't expose optvals that are greater than PAGE_SIZE
* to the BPF program.
*/
max_optlen = PAGE_SIZE;
}

ctx->optval = kzalloc(max_optlen, GFP_USER);
if (!ctx->optval)
return -ENOMEM;

ctx->optval_end = ctx->optval + max_optlen;

return 0;
return max_optlen;
}

static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
Expand Down Expand Up @@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
*/
max_optlen = max_t(int, 16, *optlen);

ret = sockopt_alloc_buf(&ctx, max_optlen);
if (ret)
return ret;
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
if (max_optlen < 0)
return max_optlen;

ctx.optlen = *optlen;

if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
Expand Down Expand Up @@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
/* export any potential modifications */
*level = ctx.level;
*optname = ctx.optname;
*optlen = ctx.optlen;
*kernel_optval = ctx.optval;

/* optlen == 0 from BPF indicates that we should
* use original userspace data.
*/
if (ctx.optlen != 0) {
*optlen = ctx.optlen;
*kernel_optval = ctx.optval;
}
}

out:
Expand Down Expand Up @@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
return retval;

ret = sockopt_alloc_buf(&ctx, max_optlen);
if (ret)
return ret;

ctx.optlen = max_optlen;

max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
if (max_optlen < 0)
return max_optlen;

if (!retval) {
/* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back
Expand All @@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}

if (ctx.optlen > max_optlen)
ctx.optlen = max_optlen;

if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
if (copy_from_user(ctx.optval, optval,
min(ctx.optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
Expand Down Expand Up @@ -1436,10 +1447,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}

if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) {
ret = -EFAULT;
goto out;
if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) {
ret = -EFAULT;
goto out;
}
}

ret = ctx.retval;
Expand Down
10 changes: 6 additions & 4 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);

static struct hlist_head *dev_map_create_hash(unsigned int entries)
static struct hlist_head *dev_map_create_hash(unsigned int entries,
int numa_node)
{
int i;
struct hlist_head *hash;

hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
if (hash != NULL)
for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]);
Expand Down Expand Up @@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
return -EINVAL;

if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head)
goto free_charge;

Expand Down Expand Up @@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map)
}
}

kfree(dtab->dev_index_head);
bpf_map_area_free(dtab->dev_index_head);
} else {
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev;
Expand Down
2 changes: 1 addition & 1 deletion kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
if (unlikely(ret < 0))
goto fail;

return 0;
return ret;
fail:
memset(dst, 0, size);
return ret;
Expand Down
1 change: 1 addition & 0 deletions net/core/xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->len = totsize - metasize;
xdpf->headroom = 0;
xdpf->metasize = metasize;
xdpf->frame_sz = PAGE_SIZE;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;

xsk_buff_free(xdp);
Expand Down
8 changes: 2 additions & 6 deletions samples/bpf/xdp_monitor_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,11 +509,8 @@ static void *alloc_rec_per_cpu(int record_size)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
void *array;
size_t size;

size = record_size * nr_cpus;
array = malloc(size);
memset(array, 0, size);
array = calloc(nr_cpus, record_size);
if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM);
Expand All @@ -528,8 +525,7 @@ static struct stats_record *alloc_stats_record(void)
int i;

/* Alloc main stats_record structure */
rec = malloc(sizeof(*rec));
memset(rec, 0, sizeof(*rec));
rec = calloc(1, sizeof(*rec));
if (!rec) {
fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM);
Expand Down
7 changes: 2 additions & 5 deletions samples/bpf/xdp_redirect_cpu_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,8 @@ static struct datarec *alloc_record_per_cpu(void)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec *array;
size_t size;

size = sizeof(struct datarec) * nr_cpus;
array = malloc(size);
memset(array, 0, size);
array = calloc(nr_cpus, sizeof(struct datarec));
if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM);
Expand All @@ -226,11 +223,11 @@ static struct stats_record *alloc_stats_record(void)

size = sizeof(*rec) + n_cpus * sizeof(struct record);
rec = malloc(size);
memset(rec, 0, size);
if (!rec) {
fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM);
}
memset(rec, 0, size);
rec->rx_cnt.cpu = alloc_record_per_cpu();
rec->redir_err.cpu = alloc_record_per_cpu();
rec->kthread.cpu = alloc_record_per_cpu();
Expand Down
13 changes: 3 additions & 10 deletions samples/bpf/xdp_rxq_info_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,8 @@ static struct datarec *alloc_record_per_cpu(void)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec *array;
size_t size;

size = sizeof(struct datarec) * nr_cpus;
array = malloc(size);
memset(array, 0, size);
array = calloc(nr_cpus, sizeof(struct datarec));
if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM);
Expand All @@ -214,11 +211,8 @@ static struct record *alloc_record_per_rxq(void)
{
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
struct record *array;
size_t size;

size = sizeof(struct record) * nr_rxqs;
array = malloc(size);
memset(array, 0, size);
array = calloc(nr_rxqs, sizeof(struct record));
if (!array) {
fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
exit(EXIT_FAIL_MEM);
Expand All @@ -232,8 +226,7 @@ static struct stats_record *alloc_stats_record(void)
struct stats_record *rec;
int i;

rec = malloc(sizeof(*rec));
memset(rec, 0, sizeof(*rec));
rec = calloc(1, sizeof(struct stats_record));
if (!rec) {
fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM);
Expand Down
2 changes: 1 addition & 1 deletion tools/bpf/bpftool/Documentation/bpftool-map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ MAP COMMANDS
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** }
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** }
DESCRIPTION
===========
Expand Down
3 changes: 2 additions & 1 deletion tools/bpf/bpftool/map.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_STACK] = "stack",
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
};

const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
Expand Down Expand Up @@ -1590,7 +1591,7 @@ static int do_help(int argc, char **argv)
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops }\n"
" queue | stack | sk_storage | struct_ops | ringbuf }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2]);
Expand Down
2 changes: 1 addition & 1 deletion tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3168,7 +3168,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
Expand Down
46 changes: 39 additions & 7 deletions tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ static int getsetsockopt(void)
char cc[16]; /* TCP_CA_NAME_MAX */
} buf = {};
socklen_t optlen;
char *big_buf = NULL;

fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
Expand All @@ -22,24 +23,31 @@ static int getsetsockopt(void)

/* IP_TOS - BPF bypass */

buf.u8[0] = 0x08;
err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1);
optlen = getpagesize() * 2;
big_buf = calloc(1, optlen);
if (!big_buf) {
log_err("Couldn't allocate two pages");
goto err;
}

*(int *)big_buf = 0x08;
err = setsockopt(fd, SOL_IP, IP_TOS, big_buf, optlen);
if (err) {
log_err("Failed to call setsockopt(IP_TOS)");
goto err;
}

buf.u8[0] = 0x00;
memset(big_buf, 0, optlen);
optlen = 1;
err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen);
err = getsockopt(fd, SOL_IP, IP_TOS, big_buf, &optlen);
if (err) {
log_err("Failed to call getsockopt(IP_TOS)");
goto err;
}

if (buf.u8[0] != 0x08) {
log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08",
buf.u8[0]);
if (*(int *)big_buf != 0x08) {
log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
*(int *)big_buf);
goto err;
}

Expand Down Expand Up @@ -78,6 +86,28 @@ static int getsetsockopt(void)
goto err;
}

/* IP_FREEBIND - BPF can't access optval past PAGE_SIZE */

optlen = getpagesize() * 2;
memset(big_buf, 0, optlen);

err = setsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, optlen);
if (err != 0) {
log_err("Failed to call setsockopt, ret=%d", err);
goto err;
}

err = getsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, &optlen);
if (err != 0) {
log_err("Failed to call getsockopt, ret=%d", err);
goto err;
}

if (optlen != 1 || *(__u8 *)big_buf != 0x55) {
log_err("Unexpected IP_FREEBIND getsockopt, optlen=%d, optval=0x%x",
optlen, *(__u8 *)big_buf);
}

/* SO_SNDBUF is overwritten */

buf.u32 = 0x01010101;
Expand Down Expand Up @@ -124,9 +154,11 @@ static int getsetsockopt(void)
goto err;
}

free(big_buf);
close(fd);
return 0;
err:
free(big_buf);
close(fd);
return -1;
}
Expand Down
Loading

0 comments on commit b9d37bb

Please sign in to comment.