forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 1
/
verifier.c
1924 lines (1699 loc) · 52.6 KB
/
verifier.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
#include <linux/vmalloc.h>
/* bpf_check() is a static code analyzer that walks eBPF program
* instruction by instruction and updates register/stack state.
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
*
* The first pass is depth-first-search to check that the program is a DAG.
* It rejects the following programs:
* - larger than BPF_MAXINSNS insns
* - if loop is present (detected via back-edge)
* - unreachable insns exist (shouldn't be a forest. program = one function)
* - out of bounds or malformed jumps
* The second pass is all possible path descent from the 1st insn.
* Since it's analyzing all pathes through the program, the length of the
* analysis is limited to 32k insn, which may be hit even if total number of
* insn is less then 4K, but there are too many branches that change stack/regs.
* Number of 'branches to be analyzed' is limited to 1k
*
* On entry to each instruction, each register has a type, and the instruction
* changes the types of the registers depending on instruction semantics.
* If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
* copied to R1.
*
* All registers are 64-bit.
* R0 - return register
* R1-R5 argument passing registers
* R6-R9 callee saved registers
* R10 - frame pointer read-only
*
* At the start of BPF program the register R1 contains a pointer to bpf_context
* and has type PTR_TO_CTX.
*
* Verifier tracks arithmetic operations on pointers in case:
* BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
* 1st insn copies R10 (which has FRAME_PTR) type into R1
* and 2nd arithmetic instruction is pattern matched to recognize
* that it wants to construct a pointer to some element within stack.
* So after 2nd insn, the register R1 has type PTR_TO_STACK
* (and -20 constant is saved for further stack bounds checking).
* Meaning that this reg is a pointer to stack plus known immediate constant.
*
* Most of the time the registers have UNKNOWN_VALUE type, which
* means the register has some value, but it's not a valid pointer.
* (like pointer plus pointer becomes UNKNOWN_VALUE type)
*
* When verifier sees load or store instructions the type of base register
* can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
* types recognized by check_mem_access() function.
*
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
* and the range of [ptr, ptr + map's value_size) is accessible.
*
* registers used to pass values to function calls are checked against
* function argument constraints.
*
* ARG_PTR_TO_MAP_KEY is one of such argument constraints.
* It means that the register type passed to this function must be
* PTR_TO_STACK and it will be used inside the function as
* 'pointer to map element key'
*
* For example the argument constraints for bpf_map_lookup_elem():
* .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
* .arg1_type = ARG_CONST_MAP_PTR,
* .arg2_type = ARG_PTR_TO_MAP_KEY,
*
* ret_type says that this function returns 'pointer to map elem value or null'
* function expects 1st argument to be a const pointer to 'struct bpf_map' and
* 2nd argument should be a pointer to stack, which will be used inside
* the helper function as a pointer to map element key.
*
* On the kernel side the helper function looks like:
* u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
* {
* struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
* void *key = (void *) (unsigned long) r2;
* void *value;
*
* here kernel can access 'key' and 'map' pointers safely, knowing that
* [key, key + map->key_size) bytes are valid and were initialized on
* the stack of eBPF program.
* }
*
* Corresponding eBPF program may look like:
* BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
* BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
* BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
* here verifier looks at prototype of map_lookup_elem() and sees:
* .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
* Now verifier knows that this map has key of R1->map_ptr->key_size bytes
*
* Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
* Now verifier checks that [R2, R2 + map's key_size) are within stack limits
* and were initialized prior to this call.
* If it's ok, then verifier allows this BPF_CALL insn and looks at
* .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
* R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
* returns ether pointer to map value or NULL.
*
* When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
* insn, the register holding that pointer in the true branch changes state to
* PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
* branch. See check_cond_jmp_op().
*
* After the call R0 is set to return type of the function and registers R1-R5
* are set to NOT_INIT to indicate that they are no longer readable.
*/
/* types of values stored in eBPF registers */
enum bpf_reg_type {
NOT_INIT = 0, /* nothing was written into register */
UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */
PTR_TO_CTX, /* reg points to bpf_context */
CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
PTR_TO_MAP_VALUE, /* reg points to map element value */
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
FRAME_PTR, /* reg == frame_pointer */
PTR_TO_STACK, /* reg == frame_pointer + imm */
CONST_IMM, /* constant integer value */
};
struct reg_state {
enum bpf_reg_type type;
union {
/* valid when type == CONST_IMM | PTR_TO_STACK */
int imm;
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
};
};
enum bpf_stack_slot_type {
STACK_INVALID, /* nothing was stored in this stack slot */
STACK_SPILL, /* 1st byte of register spilled into stack */
STACK_SPILL_PART, /* other 7 bytes of register spill */
STACK_MISC /* BPF program wrote some data into this slot */
};
struct bpf_stack_slot {
enum bpf_stack_slot_type stype;
struct reg_state reg_st;
};
/* state of the program:
* type of all registers and stack info
*/
struct verifier_state {
struct reg_state regs[MAX_BPF_REG];
struct bpf_stack_slot stack[MAX_BPF_STACK];
};
/* linked list of verifier states used to prune search */
struct verifier_state_list {
struct verifier_state state;
struct verifier_state_list *next;
};
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
struct verifier_stack_elem {
/* verifer state is 'st'
* before processing instruction 'insn_idx'
* and after processing instruction 'prev_insn_idx'
*/
struct verifier_state st;
int insn_idx;
int prev_insn_idx;
struct verifier_stack_elem *next;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
/* single container for all structs
* one verifier_env per bpf_check() call
*/
struct verifier_env {
struct bpf_prog *prog; /* eBPF program being verified */
struct verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
struct verifier_state cur_state; /* current verifier state */
struct verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
};
/* verbose verifier prints what it's seeing
* bpf_check() is called under lock, so no race to access these global vars
*/
static u32 log_level, log_size, log_len;
static char *log_buf;
static DEFINE_MUTEX(bpf_verifier_lock);
/* log_level controls verbosity level of eBPF verifier.
* verbose() is used to dump the verification trace to the log, so the user
* can figure out what's wrong with the program
*/
static void verbose(const char *fmt, ...)
{
va_list args;
if (log_level == 0 || log_len >= log_size - 1)
return;
va_start(args, fmt);
log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
va_end(args);
}
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
[UNKNOWN_VALUE] = "inv",
[PTR_TO_CTX] = "ctx",
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
[FRAME_PTR] = "fp",
[PTR_TO_STACK] = "fp",
[CONST_IMM] = "imm",
};
static void print_verifier_state(struct verifier_env *env)
{
enum bpf_reg_type t;
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
t = env->cur_state.regs[i].type;
if (t == NOT_INIT)
continue;
verbose(" R%d=%s", i, reg_type_str[t]);
if (t == CONST_IMM || t == PTR_TO_STACK)
verbose("%d", env->cur_state.regs[i].imm);
else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
verbose("(ks=%d,vs=%d)",
env->cur_state.regs[i].map_ptr->key_size,
env->cur_state.regs[i].map_ptr->value_size);
}
for (i = 0; i < MAX_BPF_STACK; i++) {
if (env->cur_state.stack[i].stype == STACK_SPILL)
verbose(" fp%d=%s", -MAX_BPF_STACK + i,
reg_type_str[env->cur_state.stack[i].reg_st.type]);
}
verbose("\n");
}
static const char *const bpf_class_string[] = {
[BPF_LD] = "ld",
[BPF_LDX] = "ldx",
[BPF_ST] = "st",
[BPF_STX] = "stx",
[BPF_ALU] = "alu",
[BPF_JMP] = "jmp",
[BPF_RET] = "BUG",
[BPF_ALU64] = "alu64",
};
static const char *const bpf_alu_string[] = {
[BPF_ADD >> 4] = "+=",
[BPF_SUB >> 4] = "-=",
[BPF_MUL >> 4] = "*=",
[BPF_DIV >> 4] = "/=",
[BPF_OR >> 4] = "|=",
[BPF_AND >> 4] = "&=",
[BPF_LSH >> 4] = "<<=",
[BPF_RSH >> 4] = ">>=",
[BPF_NEG >> 4] = "neg",
[BPF_MOD >> 4] = "%=",
[BPF_XOR >> 4] = "^=",
[BPF_MOV >> 4] = "=",
[BPF_ARSH >> 4] = "s>>=",
[BPF_END >> 4] = "endian",
};
static const char *const bpf_ldst_string[] = {
[BPF_W >> 3] = "u32",
[BPF_H >> 3] = "u16",
[BPF_B >> 3] = "u8",
[BPF_DW >> 3] = "u64",
};
static const char *const bpf_jmp_string[] = {
[BPF_JA >> 4] = "jmp",
[BPF_JEQ >> 4] = "==",
[BPF_JGT >> 4] = ">",
[BPF_JGE >> 4] = ">=",
[BPF_JSET >> 4] = "&",
[BPF_JNE >> 4] = "!=",
[BPF_JSGT >> 4] = "s>",
[BPF_JSGE >> 4] = "s>=",
[BPF_CALL >> 4] = "call",
[BPF_EXIT >> 4] = "exit",
};
static void print_bpf_insn(struct bpf_insn *insn)
{
u8 class = BPF_CLASS(insn->code);
if (class == BPF_ALU || class == BPF_ALU64) {
if (BPF_SRC(insn->code) == BPF_X)
verbose("(%02x) %sr%d %s %sr%d\n",
insn->code, class == BPF_ALU ? "(u32) " : "",
insn->dst_reg,
bpf_alu_string[BPF_OP(insn->code) >> 4],
class == BPF_ALU ? "(u32) " : "",
insn->src_reg);
else
verbose("(%02x) %sr%d %s %s%d\n",
insn->code, class == BPF_ALU ? "(u32) " : "",
insn->dst_reg,
bpf_alu_string[BPF_OP(insn->code) >> 4],
class == BPF_ALU ? "(u32) " : "",
insn->imm);
} else if (class == BPF_STX) {
if (BPF_MODE(insn->code) == BPF_MEM)
verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg,
insn->off, insn->src_reg);
else if (BPF_MODE(insn->code) == BPF_XADD)
verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
else
verbose("BUG_%02x\n", insn->code);
} else if (class == BPF_ST) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose("BUG_st_%02x\n", insn->code);
return;
}
verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg,
insn->off, insn->imm);
} else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose("BUG_ldx_%02x\n", insn->code);
return;
}
verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
insn->code, insn->dst_reg,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->off);
} else if (class == BPF_LD) {
if (BPF_MODE(insn->code) == BPF_ABS) {
verbose("(%02x) r0 = *(%s *)skb[%d]\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->imm);
} else if (BPF_MODE(insn->code) == BPF_IND) {
verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->imm);
} else if (BPF_MODE(insn->code) == BPF_IMM) {
verbose("(%02x) r%d = 0x%x\n",
insn->code, insn->dst_reg, insn->imm);
} else {
verbose("BUG_ld_%02x\n", insn->code);
return;
}
} else if (class == BPF_JMP) {
u8 opcode = BPF_OP(insn->code);
if (opcode == BPF_CALL) {
verbose("(%02x) call %d\n", insn->code, insn->imm);
} else if (insn->code == (BPF_JMP | BPF_JA)) {
verbose("(%02x) goto pc%+d\n",
insn->code, insn->off);
} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
verbose("(%02x) exit\n", insn->code);
} else if (BPF_SRC(insn->code) == BPF_X) {
verbose("(%02x) if r%d %s r%d goto pc%+d\n",
insn->code, insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
insn->src_reg, insn->off);
} else {
verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
insn->code, insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
insn->imm, insn->off);
}
} else {
verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
}
}
static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
{
struct verifier_stack_elem *elem;
int insn_idx;
if (env->head == NULL)
return -1;
memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
insn_idx = env->head->insn_idx;
if (prev_insn_idx)
*prev_insn_idx = env->head->prev_insn_idx;
elem = env->head->next;
kfree(env->head);
env->head = elem;
env->stack_size--;
return insn_idx;
}
static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
int prev_insn_idx)
{
struct verifier_stack_elem *elem;
elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
if (!elem)
goto err;
memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
elem->next = env->head;
env->head = elem;
env->stack_size++;
if (env->stack_size > 1024) {
verbose("BPF program is too complex\n");
goto err;
}
return &elem->st;
err:
/* pop all elements and return */
while (pop_stack(env, NULL) >= 0);
return NULL;
}
#define CALLER_SAVED_REGS 6
static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
static void init_reg_state(struct reg_state *regs)
{
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
regs[i].type = NOT_INIT;
regs[i].imm = 0;
regs[i].map_ptr = NULL;
}
/* frame pointer */
regs[BPF_REG_FP].type = FRAME_PTR;
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
}
static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
{
BUG_ON(regno >= MAX_BPF_REG);
regs[regno].type = UNKNOWN_VALUE;
regs[regno].imm = 0;
regs[regno].map_ptr = NULL;
}
enum reg_arg_type {
SRC_OP, /* register is used as source operand */
DST_OP, /* register is used as destination operand */
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
static int check_reg_arg(struct reg_state *regs, u32 regno,
enum reg_arg_type t)
{
if (regno >= MAX_BPF_REG) {
verbose("R%d is invalid\n", regno);
return -EINVAL;
}
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (regs[regno].type == NOT_INIT) {
verbose("R%d !read_ok\n", regno);
return -EACCES;
}
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
verbose("frame pointer is read only\n");
return -EACCES;
}
if (t == DST_OP)
mark_reg_unknown_value(regs, regno);
}
return 0;
}
static int bpf_size_to_bytes(int bpf_size)
{
if (bpf_size == BPF_W)
return 4;
else if (bpf_size == BPF_H)
return 2;
else if (bpf_size == BPF_B)
return 1;
else if (bpf_size == BPF_DW)
return 8;
else
return -EINVAL;
}
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
static int check_stack_write(struct verifier_state *state, int off, int size,
int value_regno)
{
struct bpf_stack_slot *slot;
int i;
if (value_regno >= 0 &&
(state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
state->regs[value_regno].type == PTR_TO_STACK ||
state->regs[value_regno].type == PTR_TO_CTX)) {
/* register containing pointer is being spilled into stack */
if (size != 8) {
verbose("invalid size of register spill\n");
return -EACCES;
}
slot = &state->stack[MAX_BPF_STACK + off];
slot->stype = STACK_SPILL;
/* save register state */
slot->reg_st = state->regs[value_regno];
for (i = 1; i < 8; i++) {
slot = &state->stack[MAX_BPF_STACK + off + i];
slot->stype = STACK_SPILL_PART;
slot->reg_st.type = UNKNOWN_VALUE;
slot->reg_st.map_ptr = NULL;
}
} else {
/* regular write of data into stack */
for (i = 0; i < size; i++) {
slot = &state->stack[MAX_BPF_STACK + off + i];
slot->stype = STACK_MISC;
slot->reg_st.type = UNKNOWN_VALUE;
slot->reg_st.map_ptr = NULL;
}
}
return 0;
}
static int check_stack_read(struct verifier_state *state, int off, int size,
int value_regno)
{
int i;
struct bpf_stack_slot *slot;
slot = &state->stack[MAX_BPF_STACK + off];
if (slot->stype == STACK_SPILL) {
if (size != 8) {
verbose("invalid size of register spill\n");
return -EACCES;
}
for (i = 1; i < 8; i++) {
if (state->stack[MAX_BPF_STACK + off + i].stype !=
STACK_SPILL_PART) {
verbose("corrupted spill memory\n");
return -EACCES;
}
}
if (value_regno >= 0)
/* restore register state from stack */
state->regs[value_regno] = slot->reg_st;
return 0;
} else {
for (i = 0; i < size; i++) {
if (state->stack[MAX_BPF_STACK + off + i].stype !=
STACK_MISC) {
verbose("invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
}
}
if (value_regno >= 0)
/* have read misc data from the stack */
mark_reg_unknown_value(state->regs, value_regno);
return 0;
}
}
/* check read/write into map element returned by bpf_map_lookup_elem() */
static int check_map_access(struct verifier_env *env, u32 regno, int off,
int size)
{
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
if (off < 0 || off + size > map->value_size) {
verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
map->value_size, off, size);
return -EACCES;
}
return 0;
}
/* check access to 'struct bpf_context' fields */
static int check_ctx_access(struct verifier_env *env, int off, int size,
enum bpf_access_type t)
{
if (env->prog->aux->ops->is_valid_access &&
env->prog->aux->ops->is_valid_access(off, size, t))
return 0;
verbose("invalid bpf_context access off=%d size=%d\n", off, size);
return -EACCES;
}
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
static int check_mem_access(struct verifier_env *env, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
struct verifier_state *state = &env->cur_state;
int size, err = 0;
size = bpf_size_to_bytes(bpf_size);
if (size < 0)
return size;
if (off % size != 0) {
verbose("misaligned access off %d size %d\n", off, size);
return -EACCES;
}
if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
err = check_map_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown_value(state->regs, value_regno);
} else if (state->regs[regno].type == PTR_TO_CTX) {
err = check_ctx_access(env, off, size, t);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown_value(state->regs, value_regno);
} else if (state->regs[regno].type == FRAME_PTR) {
if (off >= 0 || off < -MAX_BPF_STACK) {
verbose("invalid stack off=%d size=%d\n", off, size);
return -EACCES;
}
if (t == BPF_WRITE)
err = check_stack_write(state, off, size, value_regno);
else
err = check_stack_read(state, off, size, value_regno);
} else {
verbose("R%d invalid mem access '%s'\n",
regno, reg_type_str[state->regs[regno].type]);
return -EACCES;
}
return err;
}
static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
int err;
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
insn->imm != 0) {
verbose("BPF_XADD uses reserved fields\n");
return -EINVAL;
}
/* check src1 operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
return err;
/* check src2 operand */
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
if (err)
return err;
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
if (err)
return err;
/* check whether atomic_add can write into the same memory */
return check_mem_access(env, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE, -1);
}
/* when register 'regno' is passed into function that will read 'access_size'
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized
*/
static int check_stack_boundary(struct verifier_env *env,
int regno, int access_size)
{
struct verifier_state *state = &env->cur_state;
struct reg_state *regs = state->regs;
int off, i;
if (regs[regno].type != PTR_TO_STACK)
return -EACCES;
off = regs[regno].imm;
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
access_size <= 0) {
verbose("invalid stack type R%d off=%d access_size=%d\n",
regno, off, access_size);
return -EACCES;
}
for (i = 0; i < access_size; i++) {
if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) {
verbose("invalid indirect read from stack off %d+%d size %d\n",
off, i, access_size);
return -EACCES;
}
}
return 0;
}
static int check_func_arg(struct verifier_env *env, u32 regno,
enum bpf_arg_type arg_type, struct bpf_map **mapp)
{
struct reg_state *reg = env->cur_state.regs + regno;
enum bpf_reg_type expected_type;
int err = 0;
if (arg_type == ARG_ANYTHING)
return 0;
if (reg->type == NOT_INIT) {
verbose("R%d !read_ok\n", regno);
return -EACCES;
}
if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
} else if (arg_type == ARG_CONST_STACK_SIZE) {
expected_type = CONST_IMM;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
}
if (reg->type != expected_type) {
verbose("R%d type=%s expected=%s\n", regno,
reg_type_str[reg->type], reg_type_str[expected_type]);
return -EACCES;
}
if (arg_type == ARG_CONST_MAP_PTR) {
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
*mapp = reg->map_ptr;
} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
/* bpf_map_xxx(..., map_ptr, ..., key) call:
* check that [key, key + map->key_size) are within
* stack limits and initialized
*/
if (!*mapp) {
/* in function declaration map_ptr must come before
* map_key, so that it's verified and known before
* we have to check map_key here. Otherwise it means
* that kernel subsystem misconfigured verifier
*/
verbose("invalid map_ptr to access map->key\n");
return -EACCES;
}
err = check_stack_boundary(env, regno, (*mapp)->key_size);
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
*/
if (!*mapp) {
/* kernel subsystem misconfigured verifier */
verbose("invalid map_ptr to access map->value\n");
return -EACCES;
}
err = check_stack_boundary(env, regno, (*mapp)->value_size);
} else if (arg_type == ARG_CONST_STACK_SIZE) {
/* bpf_xxx(..., buf, len) call will access 'len' bytes
* from stack pointer 'buf'. Check it
* note: regno == len, regno - 1 == buf
*/
if (regno == 0) {
/* kernel subsystem misconfigured verifier */
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
return -EACCES;
}
err = check_stack_boundary(env, regno - 1, reg->imm);
}
return err;
}
static int check_call(struct verifier_env *env, int func_id)
{
struct verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
struct reg_state *regs = state->regs;
struct bpf_map *map = NULL;
struct reg_state *reg;
int i, err;
/* find function prototype */
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
verbose("invalid func %d\n", func_id);
return -EINVAL;
}
if (env->prog->aux->ops->get_func_proto)
fn = env->prog->aux->ops->get_func_proto(func_id);
if (!fn) {
verbose("unknown func %d\n", func_id);
return -EINVAL;
}
/* eBPF programs must be GPL compatible to use GPL-ed functions */
if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
verbose("cannot call GPL only function from proprietary program\n");
return -EINVAL;
}
/* check args */
err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
if (err)
return err;
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
if (err)
return err;
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
if (err)
return err;
err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
if (err)
return err;
err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
if (err)
return err;
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
reg = regs + caller_saved[i];
reg->type = NOT_INIT;
reg->imm = 0;
}
/* update return register */
if (fn->ret_type == RET_INTEGER) {
regs[BPF_REG_0].type = UNKNOWN_VALUE;
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
/* remember map_ptr, so that check_map_access()
* can check 'value_size' boundary of memory access
* to map element returned from bpf_map_lookup_elem()
*/
if (map == NULL) {
verbose("kernel subsystem misconfigured verifier\n");
return -EINVAL;
}
regs[BPF_REG_0].map_ptr = map;
} else {
verbose("unknown return type %d of func %d\n",
fn->ret_type, func_id);
return -EINVAL;
}
return 0;
}
/* check validity of 32-bit and 64-bit arithmetic operations */
static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
{
u8 opcode = BPF_OP(insn->code);
int err;
if (opcode == BPF_END || opcode == BPF_NEG) {
if (opcode == BPF_NEG) {
if (BPF_SRC(insn->code) != 0 ||
insn->src_reg != BPF_REG_0 ||
insn->off != 0 || insn->imm != 0) {
verbose("BPF_NEG uses reserved fields\n");
return -EINVAL;
}
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
(insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
verbose("BPF_END uses reserved fields\n");
return -EINVAL;
}
}
/* check src operand */
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
if (err)
return err;
/* check dest operand */
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
if (err)
return err;
} else if (opcode == BPF_MOV) {
if (BPF_SRC(insn->code) == BPF_X) {
if (insn->imm != 0 || insn->off != 0) {
verbose("BPF_MOV uses reserved fields\n");
return -EINVAL;
}
/* check src operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
return err;
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
verbose("BPF_MOV uses reserved fields\n");
return -EINVAL;
}
}
/* check dest operand */
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
if (err)
return err;
if (BPF_SRC(insn->code) == BPF_X) {
if (BPF_CLASS(insn->code) == BPF_ALU64) {
/* case: R1 = R2
* copy register state to dest reg
*/
regs[insn->dst_reg] = regs[insn->src_reg];
} else {
regs[insn->dst_reg].type = UNKNOWN_VALUE;
regs[insn->dst_reg].map_ptr = NULL;
}
} else {
/* case: R = imm
* remember the value we stored into this reg
*/
regs[insn->dst_reg].type = CONST_IMM;
regs[insn->dst_reg].imm = insn->imm;
}
} else if (opcode > BPF_END) {
verbose("invalid BPF_ALU opcode %x\n", opcode);
return -EINVAL;
} else { /* all other ALU ops: and, sub, xor, add, ... */
bool stack_relative = false;
if (BPF_SRC(insn->code) == BPF_X) {
if (insn->imm != 0 || insn->off != 0) {
verbose("BPF_ALU uses reserved fields\n");