forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathverifier.c
21809 lines (19381 loc) · 651 KB
/
verifier.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
* Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
*/
#include <uapi/linux/btf.h>
#include <linux/bpf-cgroup.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
#include <linux/vmalloc.h>
#include <linux/stringify.h>
#include <linux/bsearch.h>
#include <linux/sort.h>
#include <linux/perf_event.h>
#include <linux/ctype.h>
#include <linux/error-injection.h>
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/poison.h>
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/bpf_mem_alloc.h>
#include <net/xdp.h>
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#define BPF_LINK_TYPE(_id, _name)
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
#undef BPF_MAP_TYPE
#undef BPF_LINK_TYPE
};
struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;
/* bpf_check() is a static code analyzer that walks eBPF program
* instruction by instruction and updates register/stack state.
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
*
* The first pass is depth-first-search to check that the program is a DAG.
* It rejects the following programs:
* - larger than BPF_MAXINSNS insns
* - if loop is present (detected via back-edge)
* - unreachable insns exist (shouldn't be a forest. program = one function)
* - out of bounds or malformed jumps
* The second pass is all possible path descent from the 1st insn.
* Since it's analyzing all paths through the program, the length of the
* analysis is limited to 64k insn, which may be hit even if total number of
* insn is less then 4K, but there are too many branches that change stack/regs.
* Number of 'branches to be analyzed' is limited to 1k
*
* On entry to each instruction, each register has a type, and the instruction
* changes the types of the registers depending on instruction semantics.
* If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
* copied to R1.
*
* All registers are 64-bit.
* R0 - return register
* R1-R5 argument passing registers
* R6-R9 callee saved registers
* R10 - frame pointer read-only
*
* At the start of BPF program the register R1 contains a pointer to bpf_context
* and has type PTR_TO_CTX.
*
* Verifier tracks arithmetic operations on pointers in case:
* BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
* 1st insn copies R10 (which has FRAME_PTR) type into R1
* and 2nd arithmetic instruction is pattern matched to recognize
* that it wants to construct a pointer to some element within stack.
* So after 2nd insn, the register R1 has type PTR_TO_STACK
* (and -20 constant is saved for further stack bounds checking).
* Meaning that this reg is a pointer to stack plus known immediate constant.
*
* Most of the time the registers have SCALAR_VALUE type, which
* means the register has some value, but it's not a valid pointer.
* (like pointer plus pointer becomes SCALAR_VALUE type)
*
* When verifier sees load or store instructions the type of base register
* can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
* four pointer types recognized by check_mem_access() function.
*
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
* and the range of [ptr, ptr + map's value_size) is accessible.
*
* registers used to pass values to function calls are checked against
* function argument constraints.
*
* ARG_PTR_TO_MAP_KEY is one of such argument constraints.
* It means that the register type passed to this function must be
* PTR_TO_STACK and it will be used inside the function as
* 'pointer to map element key'
*
* For example the argument constraints for bpf_map_lookup_elem():
* .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
* .arg1_type = ARG_CONST_MAP_PTR,
* .arg2_type = ARG_PTR_TO_MAP_KEY,
*
* ret_type says that this function returns 'pointer to map elem value or null'
* function expects 1st argument to be a const pointer to 'struct bpf_map' and
* 2nd argument should be a pointer to stack, which will be used inside
* the helper function as a pointer to map element key.
*
* On the kernel side the helper function looks like:
* u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
* {
* struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
* void *key = (void *) (unsigned long) r2;
* void *value;
*
* here kernel can access 'key' and 'map' pointers safely, knowing that
* [key, key + map->key_size) bytes are valid and were initialized on
* the stack of eBPF program.
* }
*
* Corresponding eBPF program may look like:
* BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
* BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
* BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
* here verifier looks at prototype of map_lookup_elem() and sees:
* .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
* Now verifier knows that this map has key of R1->map_ptr->key_size bytes
*
* Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
* Now verifier checks that [R2, R2 + map's key_size) are within stack limits
* and were initialized prior to this call.
* If it's ok, then verifier allows this BPF_CALL insn and looks at
* .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
* R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
* returns either pointer to map value or NULL.
*
* When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
* insn, the register holding that pointer in the true branch changes state to
* PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
* branch. See check_cond_jmp_op().
*
* After the call R0 is set to return type of the function and registers R1-R5
* are set to NOT_INIT to indicate that they are no longer readable.
*
* The following reference types represent a potential reference to a kernel
* resource which, after first being allocated, must be checked and freed by
* the BPF program:
* - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
*
* When the verifier sees a helper call return a reference type, it allocates a
* pointer id for the reference and stores it in the current function state.
* Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
* PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
* passes through a NULL-check conditional. For the branch wherein the state is
* changed to CONST_IMM, the verifier releases the reference.
*
* For each helper function that allocates a reference, such as
* bpf_sk_lookup_tcp(), there is a corresponding release function, such as
* bpf_sk_release(). When a reference type passes into the release function,
* the verifier also releases the reference. If any unchecked or unreleased
* reference remains at the end of the program, the verifier rejects it.
*/
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
struct bpf_verifier_stack_elem {
/* verifier state is 'st'
* before processing instruction 'insn_idx'
* and after processing instruction 'prev_insn_idx'
*/
struct bpf_verifier_state st;
int insn_idx;
int prev_insn_idx;
struct bpf_verifier_stack_elem *next;
/* length of verifier log at the time this state was pushed on stack */
u32 log_pos;
};
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
#define BPF_MAP_KEY_POISON (1ULL << 63)
#define BPF_MAP_KEY_SEEN (1ULL << 62)
#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static void specialize_kfunc(struct bpf_verifier_env *env,
u32 func_id, u16 offset, unsigned long *addr);
static bool is_trusted_reg(const struct bpf_reg_state *reg);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
return aux->map_ptr_state.poison;
}
static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
{
return aux->map_ptr_state.unpriv;
}
static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
struct bpf_map *map,
bool unpriv, bool poison)
{
unpriv |= bpf_map_ptr_unpriv(aux);
aux->map_ptr_state.unpriv = unpriv;
aux->map_ptr_state.poison = poison;
aux->map_ptr_state.map_ptr = map;
}
static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
{
return aux->map_key_state & BPF_MAP_KEY_POISON;
}
static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
{
return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
}
static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
{
return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
}
static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
{
bool poisoned = bpf_map_key_poisoned(aux);
aux->map_key_state = state | BPF_MAP_KEY_SEEN |
(poisoned ? BPF_MAP_KEY_POISON : 0ULL);
}
static bool bpf_helper_call(const struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_CALL) &&
insn->src_reg == 0;
}
static bool bpf_pseudo_call(const struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_CALL) &&
insn->src_reg == BPF_PSEUDO_CALL;
}
static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_CALL) &&
insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
}
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
bool pkt_access;
u8 release_regno;
int regno;
int access_size;
int mem_size;
u64 msize_max_value;
int ref_obj_id;
int dynptr_id;
int map_uid;
int func_id;
struct btf *btf;
u32 btf_id;
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
struct btf_field *kptr_field;
};
struct bpf_kfunc_call_arg_meta {
/* In parameters */
struct btf *btf;
u32 func_id;
u32 kfunc_flags;
const struct btf_type *func_proto;
const char *func_name;
/* Out parameters */
u32 ref_obj_id;
u8 release_regno;
bool r0_rdonly;
u32 ret_btf_id;
u64 r0_size;
u32 subprogno;
struct {
u64 value;
bool found;
} arg_constant;
/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
* generally to pass info about user-defined local kptr types to later
* verification logic
* bpf_obj_drop/bpf_percpu_obj_drop
* Record the local kptr type to be drop'd
* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
* Record the local kptr type to be refcount_incr'd and use
* arg_owning_ref to determine whether refcount_acquire should be
* fallible
*/
struct btf *arg_btf;
u32 arg_btf_id;
bool arg_owning_ref;
struct {
struct btf_field *field;
} arg_list_head;
struct {
struct btf_field *field;
} arg_rbtree_root;
struct {
enum bpf_dynptr_type type;
u32 id;
u32 ref_obj_id;
} initialized_dynptr;
struct {
u8 spi;
u8 frameno;
} iter;
struct {
struct bpf_map *ptr;
int uid;
} map;
u64 mem_size;
};
struct btf *btf_vmlinux;
static const char *btf_type_name(const struct btf *btf, u32 id)
{
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
static DEFINE_MUTEX(bpf_verifier_lock);
static DEFINE_MUTEX(bpf_percpu_ma_lock);
__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
{
struct bpf_verifier_env *env = private_data;
va_list args;
if (!bpf_verifier_log_needed(&env->log))
return;
va_start(args, fmt);
bpf_verifier_vlog(&env->log, fmt, args);
va_end(args);
}
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct bpf_retval_range range, const char *ctx,
const char *reg_name)
{
bool unknown = true;
verbose(env, "%s the register %s has", ctx, reg_name);
if (reg->smin_value > S64_MIN) {
verbose(env, " smin=%lld", reg->smin_value);
unknown = false;
}
if (reg->smax_value < S64_MAX) {
verbose(env, " smax=%lld", reg->smax_value);
unknown = false;
}
if (unknown)
verbose(env, " unknown scalar value");
verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
}
static bool type_may_be_null(u32 type)
{
return type & PTR_MAYBE_NULL;
}
static bool reg_not_null(const struct bpf_reg_state *reg)
{
enum bpf_reg_type type;
type = reg->type;
if (type_may_be_null(type))
return false;
type = base_type(type);
return type == PTR_TO_SOCKET ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON ||
(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
type == PTR_TO_MEM;
}
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
struct btf_struct_meta *meta;
if (reg->type == PTR_TO_MAP_VALUE) {
rec = reg->map_ptr->record;
} else if (type_is_ptr_alloc_obj(reg->type)) {
meta = btf_find_struct_meta(reg->btf, reg->btf_id);
if (meta)
rec = meta->record;
}
return rec;
}
static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
{
struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
}
static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
{
struct bpf_func_info *info;
if (!env->prog->aux->func_info)
return "";
info = &env->prog->aux->func_info[subprog];
return btf_type_name(env->prog->aux->btf, info->type_id);
}
static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
{
struct bpf_subprog_info *info = subprog_info(env, subprog);
info->is_cb = true;
info->is_async_cb = true;
info->is_exception_cb = true;
}
static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
{
return subprog_info(env, subprog)->is_exception_cb;
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
}
static bool type_is_rdonly_mem(u32 type)
{
return type & MEM_RDONLY;
}
static bool is_acquire_function(enum bpf_func_id func_id,
const struct bpf_map *map)
{
enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
if (func_id == BPF_FUNC_sk_lookup_tcp ||
func_id == BPF_FUNC_sk_lookup_udp ||
func_id == BPF_FUNC_skc_lookup_tcp ||
func_id == BPF_FUNC_ringbuf_reserve ||
func_id == BPF_FUNC_kptr_xchg)
return true;
if (func_id == BPF_FUNC_map_lookup_elem &&
(map_type == BPF_MAP_TYPE_SOCKMAP ||
map_type == BPF_MAP_TYPE_SOCKHASH))
return true;
return false;
}
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_tcp_sock ||
func_id == BPF_FUNC_sk_fullsock ||
func_id == BPF_FUNC_skc_to_tcp_sock ||
func_id == BPF_FUNC_skc_to_tcp6_sock ||
func_id == BPF_FUNC_skc_to_udp6_sock ||
func_id == BPF_FUNC_skc_to_mptcp_sock ||
func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
static bool is_dynptr_ref_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_dynptr_data;
}
static bool is_sync_callback_calling_kfunc(u32 btf_id);
static bool is_async_callback_calling_kfunc(u32 btf_id);
static bool is_callback_calling_kfunc(u32 btf_id);
static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id);
static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_for_each_map_elem ||
func_id == BPF_FUNC_find_vma ||
func_id == BPF_FUNC_loop ||
func_id == BPF_FUNC_user_ringbuf_drain;
}
static bool is_async_callback_calling_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_timer_set_callback;
}
static bool is_callback_calling_function(enum bpf_func_id func_id)
{
return is_sync_callback_calling_function(func_id) ||
is_async_callback_calling_function(func_id);
}
static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
{
return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
(bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
}
static bool is_async_callback_calling_insn(struct bpf_insn *insn)
{
return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
(bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
}
static bool is_may_goto_insn(struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
}
static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
{
return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
}
static bool is_storage_get_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_storage_get ||
func_id == BPF_FUNC_inode_storage_get ||
func_id == BPF_FUNC_task_storage_get ||
func_id == BPF_FUNC_cgrp_storage_get;
}
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
const struct bpf_map *map)
{
int ref_obj_uses = 0;
if (is_ptr_cast_function(func_id))
ref_obj_uses++;
if (is_acquire_function(func_id, map))
ref_obj_uses++;
if (is_dynptr_ref_function(func_id))
ref_obj_uses++;
return ref_obj_uses > 1;
}
static bool is_cmpxchg_insn(const struct bpf_insn *insn)
{
return BPF_CLASS(insn->code) == BPF_STX &&
BPF_MODE(insn->code) == BPF_ATOMIC &&
insn->imm == BPF_CMPXCHG;
}
static int __get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
}
static struct bpf_func_state *func(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg)
{
struct bpf_verifier_state *cur = env->cur_state;
return cur->frame[reg->frameno];
}
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{
int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
/* We need to check that slots between [spi - nr_slots + 1, spi] are
* within [0, allocated_stack).
*
* Please note that the spi grows downwards. For example, a dynptr
* takes the size of two stack slots; the first slot will be at
* spi and the second slot will be at spi - 1.
*/
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}
static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
const char *obj_kind, int nr_slots)
{
int off, spi;
if (!tnum_is_const(reg->var_off)) {
verbose(env, "%s has to be at a constant offset\n", obj_kind);
return -EINVAL;
}
off = reg->off + reg->var_off.value;
if (off % BPF_REG_SIZE) {
verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
spi = __get_spi(off);
if (spi + 1 < nr_slots) {
verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
return -ERANGE;
return spi;
}
static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
}
static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
{
return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
}
static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
{
switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
case DYNPTR_TYPE_LOCAL:
return BPF_DYNPTR_TYPE_LOCAL;
case DYNPTR_TYPE_RINGBUF:
return BPF_DYNPTR_TYPE_RINGBUF;
case DYNPTR_TYPE_SKB:
return BPF_DYNPTR_TYPE_SKB;
case DYNPTR_TYPE_XDP:
return BPF_DYNPTR_TYPE_XDP;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
}
static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
{
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
return DYNPTR_TYPE_LOCAL;
case BPF_DYNPTR_TYPE_RINGBUF:
return DYNPTR_TYPE_RINGBUF;
case BPF_DYNPTR_TYPE_SKB:
return DYNPTR_TYPE_SKB;
case BPF_DYNPTR_TYPE_XDP:
return DYNPTR_TYPE_XDP;
default:
return 0;
}
}
static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
{
return type == BPF_DYNPTR_TYPE_RINGBUF;
}
static void __mark_dynptr_reg(struct bpf_reg_state *reg,
enum bpf_dynptr_type type,
bool first_slot, int dynptr_id);
static void __mark_reg_not_init(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
struct bpf_reg_state *sreg1,
struct bpf_reg_state *sreg2,
enum bpf_dynptr_type type)
{
int id = ++env->id_gen;
__mark_dynptr_reg(sreg1, type, true, id);
__mark_dynptr_reg(sreg2, type, false, id);
}
static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
enum bpf_dynptr_type type)
{
__mark_dynptr_reg(reg, type, true, ++env->id_gen);
}
static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
struct bpf_func_state *state, int spi);
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type type;
int spi, i, err;
spi = dynptr_get_spi(env, reg);
if (spi < 0)
return spi;
/* We cannot assume both spi and spi - 1 belong to the same dynptr,
* hence we need to call destroy_if_dynptr_stack_slot twice for both,
* to ensure that for the following example:
* [d1][d1][d2][d2]
* spi 3 2 1 0
* So marking spi = 2 should lead to destruction of both d1 and d2. In
* case they do belong to same dynptr, second call won't see slot_type
* as STACK_DYNPTR and will simply skip destruction.
*/
err = destroy_if_dynptr_stack_slot(env, state, spi);
if (err)
return err;
err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
if (err)
return err;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_DYNPTR;
state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
}
type = arg_to_dynptr_type(arg_type);
if (type == BPF_DYNPTR_TYPE_INVALID)
return -EINVAL;
mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
&state->stack[spi - 1].spilled_ptr, type);
if (dynptr_type_refcounted(type)) {
/* The id is used to track proper releasing */
int id;
if (clone_ref_obj_id)
id = clone_ref_obj_id;
else
id = acquire_reference_state(env, insn_idx);
if (id < 0)
return id;
state->stack[spi].spilled_ptr.ref_obj_id = id;
state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
}
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
return 0;
}
static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
{
int i;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_INVALID;
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
/* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
*
* While we don't allow reading STACK_INVALID, it is still possible to
* do <8 byte writes marking some but not all slots as STACK_MISC. Then,
* helpers or insns can do partial read of that part without failing,
* but check_stack_range_initialized, check_stack_read_var_off, and
* check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
* the slot conservatively. Hence we need to prevent those liveness
* marking walks.
*
* This was not a problem before because STACK_INVALID is only set by
* default (where the default reg state has its reg->parent as NULL), or
* in clean_live_states after REG_LIVE_DONE (at which point
* mark_reg_read won't walk reg->parent chain), but not randomly during
* verifier state exploration (like we did above). Hence, for our case
* parentage chain will still be live (i.e. reg->parent may be
* non-NULL), while earlier reg->parent was NULL, so we need
* REG_LIVE_WRITTEN to screen off read marker propagation when it is
* done later on reads or by mark_dynptr_read as well to unnecessary
* mark registers in verifier state.
*/
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
}
static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi, ref_obj_id, i;
spi = dynptr_get_spi(env, reg);
if (spi < 0)
return spi;
if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
invalidate_dynptr(env, state, spi);
return 0;
}
ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
/* If the dynptr has a ref_obj_id, then we need to invalidate
* two things:
*
* 1) Any dynptrs with a matching ref_obj_id (clones)
* 2) Any slices derived from this dynptr.
*/
/* Invalidate any slices associated with this dynptr */
WARN_ON_ONCE(release_reference(env, ref_obj_id));
/* Invalidate any dynptr clones */
for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
continue;
/* it should always be the case that if the ref obj id
* matches then the stack slot also belongs to a
* dynptr
*/
if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
return -EFAULT;
}
if (state->stack[i].spilled_ptr.dynptr.first_slot)
invalidate_dynptr(env, state, i);
}
return 0;
}
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
if (!env->allow_ptr_leaks)
__mark_reg_not_init(env, reg);
else
__mark_reg_unknown(env, reg);
}
static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
struct bpf_func_state *state, int spi)
{
struct bpf_func_state *fstate;
struct bpf_reg_state *dreg;
int i, dynptr_id;
/* We always ensure that STACK_DYNPTR is never set partially,
* hence just checking for slot_type[0] is enough. This is
* different for STACK_SPILL, where it may be only set for
* 1 byte, so code has to use is_spilled_reg.
*/
if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
return 0;
/* Reposition spi to first slot */
if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
spi = spi + 1;
if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
verbose(env, "cannot overwrite referenced dynptr\n");
return -EINVAL;
}
mark_stack_slot_scratched(env, spi);
mark_stack_slot_scratched(env, spi - 1);
/* Writing partially to one dynptr stack slot destroys both. */
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_INVALID;
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}
dynptr_id = state->stack[spi].spilled_ptr.id;
/* Invalidate any slices associated with this dynptr */
bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
continue;
if (dreg->dynptr_id == dynptr_id)
mark_reg_invalid(env, dreg);
}));
/* Do not release reference state, we are destroying dynptr on stack,
* not using some helper to release it. Just reset register.
*/
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
/* Same reason as unmark_stack_slots_dynptr above */
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
return 0;
}
static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
int spi;
if (reg->type == CONST_PTR_TO_DYNPTR)
return false;
spi = dynptr_get_spi(env, reg);
/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
* error because this just means the stack state hasn't been updated yet.
* We will do check_mem_access to check and update stack bounds later.
*/
if (spi < 0 && spi != -ERANGE)
return false;
/* We don't need to check if the stack slots are marked by previous
* dynptr initializations because we allow overwriting existing unreferenced
* STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
* destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
* touching are completely destructed before we reinitialize them for a new
* one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
* instead of delaying it until the end where the user will get "Unreleased
* reference" error.
*/
return true;
}
static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int i, spi;
/* This already represents first slot of initialized bpf_dynptr.
*
* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
* check_func_arg_reg_off's logic, so we don't need to check its
* offset and alignment.
*/
if (reg->type == CONST_PTR_TO_DYNPTR)
return true;
spi = dynptr_get_spi(env, reg);
if (spi < 0)
return false;
if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
return false;
for (i = 0; i < BPF_REG_SIZE; i++) {
if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
return false;
}
return true;
}
static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type dynptr_type;
int spi;
/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
if (arg_type == ARG_PTR_TO_DYNPTR)
return true;
dynptr_type = arg_to_dynptr_type(arg_type);
if (reg->type == CONST_PTR_TO_DYNPTR) {
return reg->dynptr.type == dynptr_type;
} else {
spi = dynptr_get_spi(env, reg);
if (spi < 0)
return false;
return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
}
}
static void __mark_reg_known_zero(struct bpf_reg_state *reg);