Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2018-02-09

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Two fixes for BPF sockmap in order to break up circular map references
   from programs attached to sockmap, and detaching related sockets in
   case of socket close() event. For the latter we get rid of the
   smap_state_change() and plug into ULP infrastructure, which will later
   also be used for additional features anyway such as TX hooks. For the
   second issue, dependency chain is broken up via map release callback
   to free parse/verdict programs, all from John.

2) Fix a libbpf relocation issue that was found while implementing XDP
   support for Suricata project. Issue was that when clang was invoked
   with default target instead of bpf target, then various other e.g.
   debugging relevant sections are added to the ELF file that contained
   relocation entries pointing to non-BPF related sections which libbpf
   trips over instead of skipping them. Test cases for libbpf are added
   as well, from Jesper.

3) Various misc fixes for bpftool and one for libbpf: a small addition
   to libbpf to make sure it recognizes all standard section prefixes.
   Then, the Makefile in bpftool/Documentation is improved to explicitly
   check for rst2man being installed on the system as we otherwise risk
   installing empty man pages; the man page for bpftool-map is corrected
   and a set of missing bash completions added in order to avoid shipping
   bpftool where the completions are only partially working, from Quentin.

4) Fix applying the relocation to immediate load instructions in the
   nfp JIT which were missing a shift, from Jakub.

5) Two fixes for the BPF kernel selftests: handle CONFIG_BPF_JIT_ALWAYS_ON=y
   gracefully in test_bpf.ko module and mark them as FLAG_EXPECTED_FAIL
   in this case; and explicitly delete the veth devices in the two tests
   test_xdp_{meta,redirect}.sh before dismantling the netnses as when
   selftests are run in batch mode, then workqueue to handle destruction
   might not have finished yet and thus veth creation in next test under
   same dev name would fail, from Yonghong.

6) Fix test_kmod.sh to check the test_bpf.ko module path before performing
   an insmod, and fallback to modprobe. Especially the latter is useful
   when having a device under test that has the modules installed instead,
   from Naresh.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Feb 9, 2018
2 parents 08f5138 + d977ae5 commit 437a4db
Show file tree
Hide file tree
Showing 18 changed files with 557 additions and 111 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/netronome/nfp/nfp_asm.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ u16 immed_get_value(u64 instr)
if (!unreg_is_imm(reg))
reg = FIELD_GET(OP_IMMED_B_SRC, instr);

return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr);
return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr) << 8;
}

void immed_set_value(u64 *instr, u16 immed)
Expand Down
8 changes: 8 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1983,6 +1983,11 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
#define TCP_ULP_MAX 128
#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)

enum {
TCP_ULP_TLS,
TCP_ULP_BPF,
};

struct tcp_ulp_ops {
struct list_head list;

Expand All @@ -1991,12 +1996,15 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);

int uid;
char name[TCP_ULP_NAME_MAX];
bool user_visible;
struct module *owner;
};
int tcp_register_ulp(struct tcp_ulp_ops *type);
void tcp_unregister_ulp(struct tcp_ulp_ops *type);
int tcp_set_ulp(struct sock *sk, const char *name);
int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);

Expand Down
187 changes: 115 additions & 72 deletions kernel/bpf/sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,22 +86,113 @@ struct smap_psock {
struct work_struct tx_work;
struct work_struct gc_work;

struct proto *sk_proto;
void (*save_close)(struct sock *sk, long timeout);
void (*save_data_ready)(struct sock *sk);
void (*save_write_space)(struct sock *sk);
void (*save_state_change)(struct sock *sk);
};

static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
{
return rcu_dereference_sk_user_data(sk);
}

static struct proto tcp_bpf_proto;
static int bpf_tcp_init(struct sock *sk)
{
struct smap_psock *psock;

rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
return -EINVAL;
}

if (unlikely(psock->sk_proto)) {
rcu_read_unlock();
return -EBUSY;
}

psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot;
sk->sk_prot = &tcp_bpf_proto;
rcu_read_unlock();
return 0;
}

static void bpf_tcp_release(struct sock *sk)
{
struct smap_psock *psock;

rcu_read_lock();
psock = smap_psock_sk(sk);

if (likely(psock)) {
sk->sk_prot = psock->sk_proto;
psock->sk_proto = NULL;
}
rcu_read_unlock();
}

static void smap_release_sock(struct smap_psock *psock, struct sock *sock);

static void bpf_tcp_close(struct sock *sk, long timeout)
{
void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e, *tmp;
struct smap_psock *psock;
struct sock *osk;

rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
return sk->sk_prot->close(sk, timeout);
}

/* The psock may be destroyed anytime after exiting the RCU critial
* section so by the time we use close_fun the psock may no longer
* be valid. However, bpf_tcp_close is called with the sock lock
* held so the close hook and sk are still valid.
*/
close_fun = psock->save_close;

write_lock_bh(&sk->sk_callback_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) {
list_del(&e->list);
smap_release_sock(psock, sk);
}
}
write_unlock_bh(&sk->sk_callback_lock);
rcu_read_unlock();
close_fun(sk, timeout);
}

enum __sk_action {
__SK_DROP = 0,
__SK_PASS,
__SK_REDIRECT,
};

static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
.name = "bpf_tcp",
.uid = TCP_ULP_BPF,
.user_visible = false,
.owner = NULL,
.init = bpf_tcp_init,
.release = bpf_tcp_release,
};

static int bpf_tcp_ulp_register(void)
{
tcp_bpf_proto = tcp_prot;
tcp_bpf_proto.close = bpf_tcp_close;
return tcp_register_ulp(&bpf_tcp_ulp_ops);
}

static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
{
struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
Expand Down Expand Up @@ -166,68 +257,6 @@ static void smap_report_sk_error(struct smap_psock *psock, int err)
sk->sk_error_report(sk);
}

static void smap_release_sock(struct smap_psock *psock, struct sock *sock);

/* Called with lock_sock(sk) held */
static void smap_state_change(struct sock *sk)
{
struct smap_psock_map_entry *e, *tmp;
struct smap_psock *psock;
struct socket_wq *wq;
struct sock *osk;

rcu_read_lock();

/* Allowing transitions into an established syn_recv states allows
* for early binding sockets to a smap object before the connection
* is established.
*/
switch (sk->sk_state) {
case TCP_SYN_SENT:
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
case TCP_LISTEN:
break;
case TCP_CLOSE:
/* Only release if the map entry is in fact the sock in
* question. There is a case where the operator deletes
* the sock from the map, but the TCP sock is closed before
* the psock is detached. Use cmpxchg to verify correct
* sock is removed.
*/
psock = smap_psock_sk(sk);
if (unlikely(!psock))
break;
write_lock_bh(&sk->sk_callback_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) {
list_del(&e->list);
smap_release_sock(psock, sk);
}
}
write_unlock_bh(&sk->sk_callback_lock);
break;
default:
psock = smap_psock_sk(sk);
if (unlikely(!psock))
break;
smap_report_sk_error(psock, EPIPE);
break;
}

wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
rcu_read_unlock();
}

static void smap_read_sock_strparser(struct strparser *strp,
struct sk_buff *skb)
{
Expand Down Expand Up @@ -322,10 +351,8 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
return;
sk->sk_data_ready = psock->save_data_ready;
sk->sk_write_space = psock->save_write_space;
sk->sk_state_change = psock->save_state_change;
psock->save_data_ready = NULL;
psock->save_write_space = NULL;
psock->save_state_change = NULL;
strp_stop(&psock->strp);
psock->strp_enabled = false;
}
Expand All @@ -350,6 +377,7 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
if (psock->refcnt)
return;

tcp_cleanup_ulp(sock);
smap_stop_sock(psock, sock);
clear_bit(SMAP_TX_RUNNING, &psock->state);
rcu_assign_sk_user_data(sock, NULL);
Expand Down Expand Up @@ -427,10 +455,8 @@ static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
return;
psock->save_data_ready = sk->sk_data_ready;
psock->save_write_space = sk->sk_write_space;
psock->save_state_change = sk->sk_state_change;
sk->sk_data_ready = smap_data_ready;
sk->sk_write_space = smap_write_space;
sk->sk_state_change = smap_state_change;
psock->strp_enabled = true;
}

Expand Down Expand Up @@ -509,6 +535,10 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
if (attr->value_size > KMALLOC_MAX_SIZE)
return ERR_PTR(-E2BIG);

err = bpf_tcp_ulp_register();
if (err && err != -EEXIST)
return ERR_PTR(err);

stab = kzalloc(sizeof(*stab), GFP_USER);
if (!stab)
return ERR_PTR(-ENOMEM);
Expand Down Expand Up @@ -590,11 +620,6 @@ static void sock_map_free(struct bpf_map *map)
}
rcu_read_unlock();

if (stab->bpf_verdict)
bpf_prog_put(stab->bpf_verdict);
if (stab->bpf_parse)
bpf_prog_put(stab->bpf_parse);

sock_map_remove_complete(stab);
}

Expand Down Expand Up @@ -754,6 +779,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
goto out_progs;
}

err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
if (err)
goto out_progs;

set_bit(SMAP_TX_RUNNING, &psock->state);
}

Expand Down Expand Up @@ -866,13 +895,27 @@ static int sock_map_update_elem(struct bpf_map *map,
return err;
}

static void sock_map_release(struct bpf_map *map, struct file *map_file)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_prog *orig;

orig = xchg(&stab->bpf_parse, NULL);
if (orig)
bpf_prog_put(orig);
orig = xchg(&stab->bpf_verdict, NULL);
if (orig)
bpf_prog_put(orig);
}

const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
.map_lookup_elem = sock_map_lookup,
.map_get_next_key = sock_map_get_next_key,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
.map_release = sock_map_release,
};

BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
Expand Down
Loading

0 comments on commit 437a4db

Please sign in to comment.