Skip to content

Commit

Permalink
Merge branch 'bpf-cgroup2'
Browse files Browse the repository at this point in the history
Martin KaFai Lau says:

====================
cgroup: bpf: cgroup2 membership test on skb

This series is to implement a bpf-way to
check the cgroup2 membership of a skb (sk_buff).

It is similar to the feature added in netfilter:
c38c459 ("netfilter: implement xt_cgroup cgroup2 path match")

The current target is the tc-like usage.

v3:
- Remove WARN_ON_ONCE(!rcu_read_lock_held())
- Stop BPF_MAP_TYPE_CGROUP_ARRAY usage in patch 2/4
- Avoid mounting bpf fs manually in patch 4/4

- Thanks for Daniel's review and the above suggestions

- Check CONFIG_SOCK_CGROUP_DATA instead of CONFIG_CGROUPS.  Thanks to
  the kbuild bot's report.
  Patch 2/4 only needs CONFIG_CGROUPS while patch 3/4 needs
  CONFIG_SOCK_CGROUP_DATA.  Since a single bpf cgrp2 array alone is
  not useful for now, CONFIG_SOCK_CGROUP_DATA is also used in
  patch 2/4.  We can fine tune it later if we find other use cases
  for the cgrp2 array.
- Return EAGAIN instead of ENOENT if the cgrp2 array entry is
  NULL.  It is to distinguish these two cases: 1) the userland has
  not populated this array entry yet. or 2) not finding cgrp2 from the skb.

- Be-lated thanks to Alexei and Tejun on reviewing v1 and giving advice on
  this work.

v2:
- Fix two return cases in cgroup_get_from_fd()
- Fix compilation errors when CONFIG_CGROUPS is not used:
  - arraymap.c: avoid registering BPF_MAP_TYPE_CGROUP_ARRAY
  - filter.c: tc_cls_act_func_proto() returns NULL on BPF_FUNC_skb_in_cgroup
- Add comments to BPF_FUNC_skb_in_cgroup and cgroup_get_from_fd()
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jul 1, 2016
2 parents 6bd3847 + a3f7461 commit dc9a200
Show file tree
Hide file tree
Showing 12 changed files with 506 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss);

struct cgroup *cgroup_get_from_path(const char *path);
struct cgroup *cgroup_get_from_fd(int fd);

int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
Expand Down
12 changes: 12 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
};

enum bpf_prog_type {
Expand Down Expand Up @@ -336,6 +337,17 @@ enum bpf_func_id {
*/
BPF_FUNC_skb_change_type,

/**
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_in_cgroup,
__BPF_FUNC_MAX_ID,
};

Expand Down
43 changes: 43 additions & 0 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,3 +537,46 @@ static int __init register_perf_event_array_map(void)
return 0;
}
late_initcall(register_perf_event_array_map);

#ifdef CONFIG_SOCK_CGROUP_DATA
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
{
return cgroup_get_from_fd(fd);
}

static void cgroup_fd_array_put_ptr(void *ptr)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
}

static void cgroup_fd_array_free(struct bpf_map *map)
{
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
}

static const struct bpf_map_ops cgroup_array_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = cgroup_fd_array_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
};

static struct bpf_map_type_list cgroup_array_type __read_mostly = {
.ops = &cgroup_array_ops,
.type = BPF_MAP_TYPE_CGROUP_ARRAY,
};

static int __init register_cgroup_array_map(void)
{
bpf_register_map_type(&cgroup_array_type);
return 0;
}
late_initcall(register_cgroup_array_map);
#endif
3 changes: 2 additions & 1 deletion kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,8 @@ static int map_update_elem(union bpf_attr *attr)
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
Expand Down
8 changes: 8 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (func_id != BPF_FUNC_get_stackid)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
if (func_id != BPF_FUNC_skb_in_cgroup)
goto error;
break;
default:
break;
}
Expand All @@ -1054,6 +1058,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
case BPF_FUNC_skb_in_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
break;
default:
break;
}
Expand Down
35 changes: 35 additions & 0 deletions kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#include <linux/proc_ns.h>
#include <linux/nsproxy.h>
#include <linux/proc_ns.h>
#include <linux/file.h>
#include <net/sock.h>

/*
Expand Down Expand Up @@ -6209,6 +6210,40 @@ struct cgroup *cgroup_get_from_path(const char *path)
}
EXPORT_SYMBOL_GPL(cgroup_get_from_path);

/**
* cgroup_get_from_fd - get a cgroup pointer from a fd
* @fd: fd obtained by open(cgroup2_dir)
*
* Find the cgroup from a fd which should be obtained
* by opening a cgroup directory. Returns a pointer to the
* cgroup on success. ERR_PTR is returned if the cgroup
* cannot be found.
*/
struct cgroup *cgroup_get_from_fd(int fd)
{
struct cgroup_subsys_state *css;
struct cgroup *cgrp;
struct file *f;

f = fget_raw(fd);
if (!f)
return ERR_PTR(-EBADF);

css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
fput(f);
if (IS_ERR(css))
return ERR_CAST(css);

cgrp = css->cgroup;
if (!cgroup_on_dfl(cgrp)) {
cgroup_put(cgrp);
return ERR_PTR(-EBADF);
}

return cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_get_from_fd);

/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
Expand Down
38 changes: 38 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -2239,6 +2239,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
}
}

#ifdef CONFIG_SOCK_CGROUP_DATA
static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *)(long)r1;
struct bpf_map *map = (struct bpf_map *)(long)r2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
struct sock *sk;
u32 i = (u32)r3;

sk = skb->sk;
if (!sk || !sk_fullsock(sk))
return -ENOENT;

if (unlikely(i >= array->map.max_entries))
return -E2BIG;

cgrp = READ_ONCE(array->ptrs[i]);
if (unlikely(!cgrp))
return -EAGAIN;

return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
}

static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
.func = bpf_skb_in_cgroup,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
};
#endif

static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
Expand Down Expand Up @@ -2307,6 +2341,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return bpf_get_event_output_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_in_cgroup:
return &bpf_skb_in_cgroup_proto;
#endif
default:
return sk_filter_func_proto(func_id);
}
Expand Down
3 changes: 3 additions & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ hostprogs-y += offwaketime
hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin

test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
Expand All @@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
Expand All @@ -61,6 +63,7 @@ always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include

Expand Down
2 changes: 2 additions & 0 deletions samples/bpf/bpf_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
(void *) BPF_FUNC_skb_in_cgroup;

#if defined(__x86_64__)

Expand Down
109 changes: 109 additions & 0 deletions samples/bpf/test_cgrp2_array_pin.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/unistd.h>
#include <linux/bpf.h>

#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

#include "libbpf.h"

static void usage(void)
{
printf("Usage: test_cgrp2_array_pin [...]\n");
printf(" -F <file> File to pin an BPF cgroup array\n");
printf(" -U <file> Update an already pinned BPF cgroup array\n");
printf(" -v <value> Full path of the cgroup2\n");
printf(" -h Display this help\n");
}

int main(int argc, char **argv)
{
const char *pinned_file = NULL, *cg2 = NULL;
int create_array = 1;
int array_key = 0;
int array_fd = -1;
int cg2_fd = -1;
int ret = -1;
int opt;

while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
switch (opt) {
/* General args */
case 'F':
pinned_file = optarg;
break;
case 'U':
pinned_file = optarg;
create_array = 0;
break;
case 'v':
cg2 = optarg;
break;
default:
usage();
goto out;
}
}

if (!cg2 || !pinned_file) {
usage();
goto out;
}

cg2_fd = open(cg2, O_RDONLY);
if (cg2_fd < 0) {
fprintf(stderr, "open(%s,...): %s(%d)\n",
cg2, strerror(errno), errno);
goto out;
}

if (create_array) {
array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
sizeof(uint32_t), sizeof(uint32_t),
1, 0);
if (array_fd < 0) {
fprintf(stderr,
"bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
strerror(errno), errno);
goto out;
}
} else {
array_fd = bpf_obj_get(pinned_file);
if (array_fd < 0) {
fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
pinned_file, strerror(errno), errno);
goto out;
}
}

ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0);
if (ret) {
perror("bpf_update_elem");
goto out;
}

if (create_array) {
ret = bpf_obj_pin(array_fd, pinned_file);
if (ret) {
fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
pinned_file, strerror(errno), errno);
goto out;
}
}

out:
if (array_fd != -1)
close(array_fd);
if (cg2_fd != -1)
close(cg2_fd);
return ret;
}
Loading

0 comments on commit dc9a200

Please sign in to comment.