Skip to content

Commit

Permalink
bpf: sockmap, decrement copied count correctly in redirect error case
Browse files Browse the repository at this point in the history
Currently, when a redirect occurs in sockmap and an error occurs in
the redirect call we unwind the scatterlist once in the error path
of bpf_tcp_sendmsg_do_redirect() and then again in sendmsg(). Then
in the error path of sendmsg we decrement the copied count by the
send size.

However, its possible we partially sent data before the error was
generated. This can happen if do_tcp_sendpages() partially sends the
scatterlist before encountering a memory pressure error. If this
happens we need to decrement the copied value (the value tracking
how many bytes were actually sent to TCP stack) by the number of
remaining bytes _not_ the entire send size. Otherwise we risk
confusing userspace.

Also we don't need two calls to free the scatterlist one is
good enough. So remove the one in bpf_tcp_sendmsg_do_redirect() and
then properly reduce copied by the number of remaining bytes which
may in fact be the entire send size if no bytes were sent.

To do this use bool to indicate if free_start_sg() should do mem
accounting or not.

Signed-off-by: John Fastabend <[email protected]>
Signed-off-by: Daniel Borkmann <[email protected]>
  • Loading branch information
jrfastab authored and borkmann committed Aug 28, 2018
1 parent 3f6e138 commit 501ca81
Showing 1 changed file with 22 additions and 23 deletions.
45 changes: 22 additions & 23 deletions kernel/bpf/sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ static int bpf_tcp_init(struct sock *sk)
}

static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);

static void bpf_tcp_release(struct sock *sk)
{
Expand All @@ -248,7 +248,7 @@ static void bpf_tcp_release(struct sock *sk)
goto out;

if (psock->cork) {
free_start_sg(psock->sock, psock->cork);
free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
psock->cork = NULL;
}
Expand Down Expand Up @@ -330,14 +330,14 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
close_fun = psock->save_close;

if (psock->cork) {
free_start_sg(psock->sock, psock->cork);
free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
psock->cork = NULL;
}

list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
free_start_sg(psock->sock, md);
free_start_sg(psock->sock, md, true);
kfree(md);
}

Expand Down Expand Up @@ -570,14 +570,16 @@ static void free_bytes_sg(struct sock *sk, int bytes,
md->sg_start = i;
}

static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
static int free_sg(struct sock *sk, int start,
struct sk_msg_buff *md, bool charge)
{
struct scatterlist *sg = md->sg_data;
int i = start, free = 0;

while (sg[i].length) {
free += sg[i].length;
sk_mem_uncharge(sk, sg[i].length);
if (charge)
sk_mem_uncharge(sk, sg[i].length);
if (!md->skb)
put_page(sg_page(&sg[i]));
sg[i].length = 0;
Expand All @@ -594,17 +596,17 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
return free;
}

static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
{
int free = free_sg(sk, md->sg_start, md);
int free = free_sg(sk, md->sg_start, md, charge);

md->sg_start = md->sg_end;
return free;
}

static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
{
return free_sg(sk, md->sg_curr, md);
return free_sg(sk, md->sg_curr, md, true);
}

static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
Expand Down Expand Up @@ -718,7 +720,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
} else {
free_start_sg(sk, r);
free_start_sg(sk, r, true);
kfree(r);
}

Expand Down Expand Up @@ -752,14 +754,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
release_sock(sk);
}
smap_release_sock(psock, sk);
if (unlikely(err))
goto out;
return 0;
return err;
out_rcu:
rcu_read_unlock();
out:
free_bytes_sg(NULL, send, md, false);
return err;
return 0;
}

static inline void bpf_md_init(struct smap_psock *psock)
Expand Down Expand Up @@ -822,7 +820,7 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
case __SK_PASS:
err = bpf_tcp_push(sk, send, m, flags, true);
if (unlikely(err)) {
*copied -= free_start_sg(sk, m);
*copied -= free_start_sg(sk, m, true);
break;
}

Expand All @@ -845,16 +843,17 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
lock_sock(sk);

if (unlikely(err < 0)) {
free_start_sg(sk, m);
int free = free_start_sg(sk, m, false);

psock->sg_size = 0;
if (!cork)
*copied -= send;
*copied -= free;
} else {
psock->sg_size -= send;
}

if (cork) {
free_start_sg(sk, m);
free_start_sg(sk, m, true);
psock->sg_size = 0;
kfree(m);
m = NULL;
Expand Down Expand Up @@ -1121,7 +1120,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
err = sk_stream_wait_memory(sk, &timeo);
if (err) {
if (m && m != psock->cork)
free_start_sg(sk, m);
free_start_sg(sk, m, true);
goto out_err;
}
}
Expand Down Expand Up @@ -1580,13 +1579,13 @@ static void smap_gc_work(struct work_struct *w)
bpf_prog_put(psock->bpf_tx_msg);

if (psock->cork) {
free_start_sg(psock->sock, psock->cork);
free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
}

list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
free_start_sg(psock->sock, md);
free_start_sg(psock->sock, md, true);
kfree(md);
}

Expand Down

0 comments on commit 501ca81

Please sign in to comment.