Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
1) libbpf should not attempt to load unused subprogs, from Andrii.

2) Make strncpy_from_user() mask out bytes after NUL terminator, from Daniel.

3) Relax return code check for subprograms in the BPF verifier, from Dmitrii.

4) Fix several sockmap issues, from John.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  fail_function: Remove a redundant mutex unlock
  selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes after NUL
  lib/strncpy_from_user.c: Mask out bytes after NUL terminator.
  libbpf: Fix VERSIONED_SYM_COUNT number parsing
  bpf, sockmap: Avoid failures from skb_to_sgvec when skb has frag_list
  bpf, sockmap: Handle memory acct if skb_verdict prog redirects to self
  bpf, sockmap: Avoid returning unneeded EAGAIN when redirecting to self
  bpf, sockmap: Use truesize with sk_rmem_schedule()
  bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect
  bpf, sockmap: Fix partial copy_page_to_iter so progress can still be made
  selftests/bpf: Fix error return code in run_getsockopt_test()
  bpf: Relax return code check for subprograms
  tools, bpftool: Add missing close before bpftool net attach exit
  MAINTAINERS/bpf: Update Andrii's entry.
  selftests/bpf: Fix unused attribute usage in subprogs_unused test
  bpf: Fix unsigned 'datasec_id' compared with zero in check_pseudo_btf_id
  bpf: Fix passing zero to PTR_ERR() in bpf_btf_printf_prepare
  libbpf: Don't attempt to load unused subprog as an entry-point BPF program
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Nov 19, 2020
2 parents 90b4978 + 2801a5d commit e6ea60b
Show file tree
Hide file tree
Showing 17 changed files with 301 additions and 49 deletions.
2 changes: 1 addition & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -3243,10 +3243,10 @@ F: drivers/iio/accel/bma400*
BPF (Safe dynamic programs and tools)
M: Alexei Starovoitov <[email protected]>
M: Daniel Borkmann <[email protected]>
M: Andrii Nakryiko <[email protected]>
R: Martin KaFai Lau <[email protected]>
R: Song Liu <[email protected]>
R: Yonghong Song <[email protected]>
R: Andrii Nakryiko <[email protected]>
R: John Fastabend <[email protected]>
R: KP Singh <[email protected]>
L: [email protected]
Expand Down
18 changes: 15 additions & 3 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
const bool is_subprog = env->cur_state->frame[0]->subprogno;

/* LSM and struct_ops func-ptr's return type could be "void" */
if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
if (!is_subprog &&
(prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
prog_type == BPF_PROG_TYPE_LSM) &&
!prog->aux->attach_func_proto->type)
return 0;
Expand All @@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EACCES;
}

reg = cur_regs(env) + BPF_REG_0;
if (is_subprog) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
reg_type_str[reg->type]);
return -EINVAL;
}
return 0;
}

switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
Expand Down Expand Up @@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
}

reg = cur_regs(env) + BPF_REG_0;
if (reg->type != SCALAR_VALUE) {
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
reg_type_str[reg->type]);
Expand Down Expand Up @@ -9572,12 +9583,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
struct bpf_insn_aux_data *aux)
{
u32 datasec_id, type, id = insn->imm;
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
u32 type, id = insn->imm;
s32 datasec_id;
u64 addr;
int i;

Expand Down
5 changes: 3 additions & 2 deletions kernel/fail_function.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,

if (copy_from_user(buf, buffer, count)) {
ret = -EFAULT;
goto out;
goto out_free;
}
buf[count] = '\0';
sym = strstrip(buf);
Expand Down Expand Up @@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
ret = count;
}
out:
kfree(buf);
mutex_unlock(&fei_lock);
out_free:
kfree(buf);
return ret;
}

Expand Down
12 changes: 11 additions & 1 deletion kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
{
int ret;

/*
* NB: We rely on strncpy_from_user() not copying junk past the NUL
* terminator into `dst`.
*
* strncpy_from_user() does long-sized strides in the fast path. If the
* strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
* then there could be junk after the NUL in `dst`. If user takes `dst`
* and keys a hash map with it, then semantically identical strings can
* occupy multiple entries in the map.
*/
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
Expand Down Expand Up @@ -1198,7 +1208,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
*btf = bpf_get_btf_vmlinux();

if (IS_ERR_OR_NULL(*btf))
return PTR_ERR(*btf);
return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;

if (ptr->type_id > 0)
*btf_id = ptr->type_id;
Expand Down
19 changes: 17 additions & 2 deletions lib/strncpy_from_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
goto byte_at_a_time;

while (max >= sizeof(unsigned long)) {
unsigned long c, data;
unsigned long c, data, mask;

/* Fall back to byte-at-a-time if we get a page fault */
unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);

*(unsigned long *)(dst+res) = c;
/*
* Note that we mask out the bytes following the NUL. This is
* important to do because string oblivious code may read past
* the NUL. For those routines, we don't want to give them
* potentially random bytes after the NUL in `src`.
*
* One example of such code is BPF map keys. BPF treats map keys
* as an opaque set of bytes. Without the post-NUL mask, any BPF
* maps keyed by strings returned from strncpy_from_user() may
* have multiple entries for semantically identical strings.
*/
if (has_zero(c, &data, &constants)) {
data = prep_zero_mask(c, data, &constants);
data = create_zero_mask(data);
mask = zero_bytemask(data);
*(unsigned long *)(dst+res) = c & mask;
return res + find_zero(data);
}

*(unsigned long *)(dst+res) = c;

res += sizeof(unsigned long);
max -= sizeof(unsigned long);
}
Expand Down
87 changes: 74 additions & 13 deletions net/core/skmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
struct scatterlist *sge = sk_msg_elem(msg, i);
u32 len = sge->length;

if (charge)
sk_mem_uncharge(sk, len);
if (!msg->skb)
/* When the skb owns the memory we free it from consume_skb path. */
if (!msg->skb) {
if (charge)
sk_mem_uncharge(sk, len);
put_page(sg_page(sge));
}
memset(sge, 0, sizeof(*sge));
return len;
}
Expand Down Expand Up @@ -397,28 +399,45 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);

static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
struct sock *sk = psock->sk;
int copied = 0, num_sge;
struct sk_msg *msg;

if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
return NULL;

if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;

msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
if (unlikely(!msg))
return -EAGAIN;
if (!sk_rmem_schedule(sk, skb, skb->len)) {
kfree(msg);
return -EAGAIN;
}
return NULL;

sk_msg_init(msg);
return msg;
}

static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
struct sk_psock *psock,
struct sock *sk,
struct sk_msg *msg)
{
int num_sge, copied;

/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
*/
if (skb_linearize(skb))
return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
if (unlikely(num_sge < 0)) {
kfree(msg);
return num_sge;
}

sk_mem_charge(sk, skb->len);
copied = skb->len;
msg->sg.start = 0;
msg->sg.size = copied;
Expand All @@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
return copied;
}

static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);

static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;

/* If we are receiving on the same sock skb->sk is already assigned,
* skip memory accounting and owner transition seeing it already set
* correctly.
*/
if (unlikely(skb->sk == sk))
return sk_psock_skb_ingress_self(psock, skb);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;

/* This will transition ownership of the data from the socket where
* the BPF program was run initiating the redirect to the socket
* we will eventually receive this data on. The data will be released
* from skb_consume found in __tcp_bpf_recvmsg() after its been copied
* into user buffers.
*/
skb_set_owner_r(skb, sk);
return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
}

/* Puts an skb on the ingress queue of the socket already assigned to the
* skb. In this case we do not need to check memory limits or skb_set_owner_r
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;

if (unlikely(!msg))
return -EAGAIN;
sk_msg_init(msg);
return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
}

static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
Expand Down Expand Up @@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
* retrying later from workqueue.
*/
if (skb_queue_empty(&psock->ingress_skb)) {
err = sk_psock_skb_ingress(psock, skb);
err = sk_psock_skb_ingress_self(psock, skb);
}
if (err < 0) {
skb_queue_tail(&psock->ingress_skb, skb);
Expand Down
18 changes: 11 additions & 7 deletions net/ipv4/tcp_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
{
struct iov_iter *iter = &msg->msg_iter;
int peek = flags & MSG_PEEK;
int i, ret, copied = 0;
struct sk_msg *msg_rx;
int i, copied = 0;

msg_rx = list_first_entry_or_null(&psock->ingress_msg,
struct sk_msg, list);
Expand All @@ -37,17 +37,16 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
ret = copy_page_to_iter(page, sge->offset, copy, iter);
if (ret != copy) {
msg_rx->sg.start = i;
return -EFAULT;
}
copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy)
return copied ? copied : -EFAULT;

copied += copy;
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
sk_mem_uncharge(sk, copy);
if (!msg_rx->skb)
sk_mem_uncharge(sk, copy);
msg_rx->sg.size -= copy;

if (!sge->length) {
Expand All @@ -56,6 +55,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
put_page(page);
}
} else {
/* Lets not optimize peek case if copy_page_to_iter
* didn't copy the entire length lets just break.
*/
if (copy != sge->length)
return copied;
sk_msg_iter_var_next(i);
}

Expand Down
18 changes: 9 additions & 9 deletions tools/bpf/bpftool/net.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,35 +578,35 @@ static int do_attach(int argc, char **argv)

ifindex = net_parse_dev(&argc, &argv);
if (ifindex < 1) {
close(progfd);
return -EINVAL;
err = -EINVAL;
goto cleanup;
}

if (argc) {
if (is_prefix(*argv, "overwrite")) {
overwrite = true;
} else {
p_err("expected 'overwrite', got: '%s'?", *argv);
close(progfd);
return -EINVAL;
err = -EINVAL;
goto cleanup;
}
}

/* attach xdp prog */
if (is_prefix("xdp", attach_type_strings[attach_type]))
err = do_attach_detach_xdp(progfd, attach_type, ifindex,
overwrite);

if (err < 0) {
if (err) {
p_err("interface %s attach failed: %s",
attach_type_strings[attach_type], strerror(-err));
return err;
goto cleanup;
}

if (json_output)
jsonw_null(json_wtr);

return 0;
cleanup:
close(progfd);
return err;
}

static int do_detach(int argc, char **argv)
Expand Down
2 changes: 2 additions & 0 deletions tools/lib/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)

Expand Down Expand Up @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
Expand Down
Loading

0 comments on commit e6ea60b

Please sign in to comment.