forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ctree.h
2654 lines (2295 loc) · 85.2 KB
/
ctree.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_CTREE__
#define __BTRFS_CTREE__
#include <linux/version.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/kobject.h>
#include <trace/events/btrfs.h>
#include <asm/kmap_types.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
#include "ioctl.h"
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_BHRfS_M"
#define BTRFS_MAX_LEVEL 8
#define BTRFS_COMPAT_EXTENT_TREE_V0
/*
* files bigger than this get some pre-flushing when they are added
* to the ordered operations list. That way we limit the total
* work done by the commit
*/
#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
/* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
/* stores information about which extents are in use, and reference counts */
#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
/*
* chunk tree stores translations from logical -> physical block numbering
* the super block points to the chunk tree
*/
#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
/*
* stores information about which areas of a given device are in use.
* one per device. The tree of tree roots points to the device tree
*/
#define BTRFS_DEV_TREE_OBJECTID 4ULL
/* one per subvolume, storing files and directories */
#define BTRFS_FS_TREE_OBJECTID 5ULL
/* directory objectid inside the root tree */
#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
/* does write ahead logging to speed up fsyncs */
#define BTRFS_TREE_LOG_OBJECTID -6ULL
#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
/* for space balancing */
#define BTRFS_TREE_RELOC_OBJECTID -8ULL
#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
/*
* extent checksums all have this objectid
* this allows them to share the logging tree
* for fsyncs
*/
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
/* For storing free space cache */
#define BTRFS_FREE_SPACE_OBJECTID -11ULL
/*
* The inode number assigned to the special inode for sotring
* free ino cache
*/
#define BTRFS_FREE_INO_OBJECTID -12ULL
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
/*
* All files have objectids in this range.
*/
#define BTRFS_FIRST_FREE_OBJECTID 256ULL
#define BTRFS_LAST_FREE_OBJECTID -256ULL
#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
/*
* the device items go into the chunk tree. The key is in the form
* [ 1 BTRFS_DEV_ITEM_KEY device_id ]
*/
#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
#define BTRFS_BTREE_INODE_OBJECTID 1
#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
/*
* we can actually store much bigger names, but lets not confuse the rest
* of linux
*/
#define BTRFS_NAME_LEN 255
/* 32 bytes in various csum fields */
#define BTRFS_CSUM_SIZE 32
/* csum types */
#define BTRFS_CSUM_TYPE_CRC32 0
static int btrfs_csum_sizes[] = { 4, 0 };
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
#define BTRFS_FT_UNKNOWN 0
#define BTRFS_FT_REG_FILE 1
#define BTRFS_FT_DIR 2
#define BTRFS_FT_CHRDEV 3
#define BTRFS_FT_BLKDEV 4
#define BTRFS_FT_FIFO 5
#define BTRFS_FT_SOCK 6
#define BTRFS_FT_SYMLINK 7
#define BTRFS_FT_XATTR 8
#define BTRFS_FT_MAX 9
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
*
* objectid corresponds to the inode number.
*
* type tells us things about the object, and is a kind of stream selector.
* so for a given inode, keys with type of 1 might refer to the inode data,
* type of 2 may point to file data in the btree and type == 3 may point to
* extents.
*
* offset is the starting byte offset for this key in the stream.
*
* btrfs_disk_key is in disk byte order. struct btrfs_key is always
* in cpu native order. Otherwise they are identical and their sizes
* should be the same (ie both packed)
*/
struct btrfs_disk_key {
__le64 objectid;
u8 type;
__le64 offset;
} __attribute__ ((__packed__));
struct btrfs_key {
u64 objectid;
u8 type;
u64 offset;
} __attribute__ ((__packed__));
struct btrfs_mapping_tree {
struct extent_map_tree map_tree;
};
struct btrfs_dev_item {
/* the internal btrfs device id */
__le64 devid;
/* size of the device */
__le64 total_bytes;
/* bytes used */
__le64 bytes_used;
/* optimal io alignment for this device */
__le32 io_align;
/* optimal io width for this device */
__le32 io_width;
/* minimal io size for this device */
__le32 sector_size;
/* type and info about this device */
__le64 type;
/* expected generation for this device */
__le64 generation;
/*
* starting byte of this partition on the device,
* to allow for stripe alignment in the future
*/
__le64 start_offset;
/* grouping information for allocation decisions */
__le32 dev_group;
/* seek speed 0-100 where 100 is fastest */
u8 seek_speed;
/* bandwidth 0-100 where 100 is fastest */
u8 bandwidth;
/* btrfs generated uuid for this device */
u8 uuid[BTRFS_UUID_SIZE];
/* uuid of FS who owns this device */
u8 fsid[BTRFS_UUID_SIZE];
} __attribute__ ((__packed__));
struct btrfs_stripe {
__le64 devid;
__le64 offset;
u8 dev_uuid[BTRFS_UUID_SIZE];
} __attribute__ ((__packed__));
struct btrfs_chunk {
/* size of this chunk in bytes */
__le64 length;
/* objectid of the root referencing this chunk */
__le64 owner;
__le64 stripe_len;
__le64 type;
/* optimal io alignment for this chunk */
__le32 io_align;
/* optimal io width for this chunk */
__le32 io_width;
/* minimal io size for this chunk */
__le32 sector_size;
/* 2^16 stripes is quite a lot, a second limit is the size of a single
* item in the btree
*/
__le16 num_stripes;
/* sub stripes only matter for raid10 */
__le16 sub_stripes;
struct btrfs_stripe stripe;
/* additional stripes go here */
} __attribute__ ((__packed__));
#define BTRFS_FREE_SPACE_EXTENT 1
#define BTRFS_FREE_SPACE_BITMAP 2
struct btrfs_free_space_entry {
__le64 offset;
__le64 bytes;
u8 type;
} __attribute__ ((__packed__));
struct btrfs_free_space_header {
struct btrfs_disk_key location;
__le64 generation;
__le64 num_entries;
__le64 num_bitmaps;
} __attribute__ ((__packed__));
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
return sizeof(struct btrfs_chunk) +
sizeof(struct btrfs_stripe) * (num_stripes - 1);
}
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
/*
* File system states
*/
/* Errors detected */
#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
#define BTRFS_BACKREF_REV_MAX 256
#define BTRFS_BACKREF_REV_SHIFT 56
#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
BTRFS_BACKREF_REV_SHIFT)
#define BTRFS_OLD_BACKREF_REV 0
#define BTRFS_MIXED_BACKREF_REV 1
/*
* every tree block (leaf or node) starts with this header.
*/
struct btrfs_header {
/* these first four must match the super block */
u8 csum[BTRFS_CSUM_SIZE];
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
__le64 bytenr; /* which block this node is supposed to live in */
__le64 flags;
/* allowed to be different from the super from here on down */
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
__le64 generation;
__le64 owner;
__le32 nritems;
u8 level;
} __attribute__ ((__packed__));
#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
sizeof(struct btrfs_header)) / \
sizeof(struct btrfs_key_ptr))
#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) - \
sizeof(struct btrfs_file_extent_item))
#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) -\
sizeof(struct btrfs_dir_item))
/*
* this is a very generous portion of the super block, giving us
* room to translate 14 chunks with 3 stripes each.
*/
#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
#define BTRFS_LABEL_SIZE 256
/*
* the super block basically lists the main trees of the FS
* it currently lacks any block count etc etc
*/
struct btrfs_super_block {
u8 csum[BTRFS_CSUM_SIZE];
/* the first 4 fields must match struct btrfs_header */
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
__le64 bytenr; /* this block number */
__le64 flags;
/* allowed to be different from the btrfs_header from here own down */
__le64 magic;
__le64 generation;
__le64 root;
__le64 chunk_root;
__le64 log_root;
/* this will help find the new super based on the log root */
__le64 log_root_transid;
__le64 total_bytes;
__le64 bytes_used;
__le64 root_dir_objectid;
__le64 num_devices;
__le32 sectorsize;
__le32 nodesize;
__le32 leafsize;
__le32 stripesize;
__le32 sys_chunk_array_size;
__le64 chunk_root_generation;
__le64 compat_flags;
__le64 compat_ro_flags;
__le64 incompat_flags;
__le16 csum_type;
u8 root_level;
u8 chunk_root_level;
u8 log_root_level;
struct btrfs_dev_item dev_item;
char label[BTRFS_LABEL_SIZE];
__le64 cache_generation;
/* future expansion */
__le64 reserved[31];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
} __attribute__ ((__packed__));
/*
* Compat flags that we support. If any incompat flags are set other than the
* ones specified below then we will fail to mount
*/
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
/*
* A leaf is full of items. offset and size tell us where to find
* the item in the leaf (relative to the start of the data area)
*/
struct btrfs_item {
struct btrfs_disk_key key;
__le32 offset;
__le32 size;
} __attribute__ ((__packed__));
/*
* leaves have an item area and a data area:
* [item0, item1....itemN] [free space] [dataN...data1, data0]
*
* The data is separate from the items to get the keys closer together
* during searches.
*/
struct btrfs_leaf {
struct btrfs_header header;
struct btrfs_item items[];
} __attribute__ ((__packed__));
/*
* all non-leaf blocks are nodes, they hold only keys and pointers to
* other blocks
*/
struct btrfs_key_ptr {
struct btrfs_disk_key key;
__le64 blockptr;
__le64 generation;
} __attribute__ ((__packed__));
struct btrfs_node {
struct btrfs_header header;
struct btrfs_key_ptr ptrs[];
} __attribute__ ((__packed__));
/*
* btrfs_paths remember the path taken from the root down to the leaf.
* level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
* to any other levels that are present.
*
* The slots array records the index of the item or block pointer
* used while walking the tree.
*/
struct btrfs_path {
struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL];
/* if there is real range locking, this locks field will change */
int locks[BTRFS_MAX_LEVEL];
int reada;
/* keep some upper locks as we walk down */
int lowest_level;
/*
* set by btrfs_split_item, tells search_slot to keep all locks
* and to force calls to keep space in the nodes
*/
unsigned int search_for_split:1;
unsigned int keep_locks:1;
unsigned int skip_locking:1;
unsigned int leave_spinning:1;
unsigned int search_commit_root:1;
};
/*
* items in the extent btree are used to record the objectid of the
* owner of the block and the number of references
*/
struct btrfs_extent_item {
__le64 refs;
__le64 generation;
__le64 flags;
} __attribute__ ((__packed__));
struct btrfs_extent_item_v0 {
__le32 refs;
} __attribute__ ((__packed__));
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
sizeof(struct btrfs_item))
#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
/* following flags only apply to tree blocks */
/* use full backrefs for extent pointers in the block */
#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
/*
* this flag is only used internally by scrub and may be changed at any time
* it is only declared here to avoid collisions
*/
#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
struct btrfs_tree_block_info {
struct btrfs_disk_key key;
u8 level;
} __attribute__ ((__packed__));
struct btrfs_extent_data_ref {
__le64 root;
__le64 objectid;
__le64 offset;
__le32 count;
} __attribute__ ((__packed__));
struct btrfs_shared_data_ref {
__le32 count;
} __attribute__ ((__packed__));
struct btrfs_extent_inline_ref {
u8 type;
__le64 offset;
} __attribute__ ((__packed__));
/* old style backrefs item */
struct btrfs_extent_ref_v0 {
__le64 root;
__le64 generation;
__le64 objectid;
__le32 count;
} __attribute__ ((__packed__));
/* dev extents record free space on individual devices. The owner
* field points back to the chunk allocation mapping tree that allocated
* the extent. The chunk tree uuid field is a way to double check the owner
*/
struct btrfs_dev_extent {
__le64 chunk_tree;
__le64 chunk_objectid;
__le64 chunk_offset;
__le64 length;
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
} __attribute__ ((__packed__));
struct btrfs_inode_ref {
__le64 index;
__le16 name_len;
/* name goes here */
} __attribute__ ((__packed__));
struct btrfs_timespec {
__le64 sec;
__le32 nsec;
} __attribute__ ((__packed__));
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_TYPES = 2,
BTRFS_COMPRESS_LAST = 3,
};
struct btrfs_inode_item {
/* nfs style generation number */
__le64 generation;
/* transid that last touched this inode */
__le64 transid;
__le64 size;
__le64 nbytes;
__le64 block_group;
__le32 nlink;
__le32 uid;
__le32 gid;
__le32 mode;
__le64 rdev;
__le64 flags;
/* modification sequence number for NFS */
__le64 sequence;
/*
* a little future expansion, for more than this we can
* just grow the inode item and version it
*/
__le64 reserved[4];
struct btrfs_timespec atime;
struct btrfs_timespec ctime;
struct btrfs_timespec mtime;
struct btrfs_timespec otime;
} __attribute__ ((__packed__));
struct btrfs_dir_log_item {
__le64 end;
} __attribute__ ((__packed__));
struct btrfs_dir_item {
struct btrfs_disk_key location;
__le64 transid;
__le16 data_len;
__le16 name_len;
u8 type;
} __attribute__ ((__packed__));
#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
__le64 root_dirid;
__le64 bytenr;
__le64 byte_limit;
__le64 bytes_used;
__le64 last_snapshot;
__le64 flags;
__le32 refs;
struct btrfs_disk_key drop_progress;
u8 drop_level;
u8 level;
} __attribute__ ((__packed__));
/*
* this is used for both forward and backward root refs
*/
struct btrfs_root_ref {
__le64 dirid;
__le64 sequence;
__le16 name_len;
} __attribute__ ((__packed__));
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
struct btrfs_file_extent_item {
/*
* transaction id that created this extent
*/
__le64 generation;
/*
* max number of bytes to hold this extent in ram
* when we split a compressed extent we can't know how big
* each of the resulting pieces will be. So, this is
* an upper limit on the size of the extent in ram instead of
* an exact limit.
*/
__le64 ram_bytes;
/*
* 32 bits for the various ways we might encode the data,
* including compression and encryption. If any of these
* are set to something a given disk format doesn't understand
* it is treated like an incompat flag for reading and writing,
* but not for stat.
*/
u8 compression;
u8 encryption;
__le16 other_encoding; /* spare for later use */
/* are we inline data or a real extent? */
u8 type;
/*
* disk space consumed by the extent, checksum blocks are included
* in these numbers
*/
__le64 disk_bytenr;
__le64 disk_num_bytes;
/*
* the logical offset in file blocks (no csums)
* this extent record is for. This allows a file extent to point
* into the middle of an existing extent on disk, sharing it
* between two snapshots (useful if some bytes in the middle of the
* extent have changed
*/
__le64 offset;
/*
* the logical number of file blocks (no csums included). This
* always reflects the size uncompressed and without encoding.
*/
__le64 num_bytes;
} __attribute__ ((__packed__));
struct btrfs_csum_item {
u8 csum;
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
#define BTRFS_NR_RAID_TYPES 5
struct btrfs_block_group_item {
__le64 used;
__le64 chunk_objectid;
__le64 flags;
} __attribute__ ((__packed__));
struct btrfs_space_info {
u64 flags;
u64 total_bytes; /* total bytes in the space,
this doesn't take mirrors into account */
u64 bytes_used; /* total bytes used,
this doesn't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
transaction finishes */
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 disk_used; /* total bytes used on disk */
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
/*
* we bump reservation progress every time we decrement
* bytes_reserved. This way people waiting for reservations
* know something good has happened and they can check
* for progress. The number here isn't to be trusted, it
* just shows reclaim activity
*/
unsigned long reservation_progress;
unsigned int full:1; /* indicates that we cannot allocate any more
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
unsigned int force_alloc; /* set if we need to force a chunk
alloc for this space */
struct list_head list;
/* for block groups in our same type */
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
atomic_t caching_threads;
};
struct btrfs_block_rsv {
u64 size;
u64 reserved;
u64 freed[2];
struct btrfs_space_info *space_info;
struct list_head list;
spinlock_t lock;
atomic_t usage;
unsigned int priority:8;
unsigned int durable:1;
unsigned int refill_used:1;
unsigned int full:1;
};
/*
* free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata
* allocations and data allocations in ssd mode.
*/
struct btrfs_free_cluster {
spinlock_t lock;
spinlock_t refill_lock;
struct rb_root root;
/* largest extent in this cluster */
u64 max_size;
/* first extent starting offset */
u64 window_start;
struct btrfs_block_group_cache *block_group;
/*
* when a cluster is allocated from a block group, we put the
* cluster onto a list in the block group so that it can
* be freed before the block group is freed.
*/
struct list_head block_group_list;
};
enum btrfs_caching_type {
BTRFS_CACHE_NO = 0,
BTRFS_CACHE_STARTED = 1,
BTRFS_CACHE_FINISHED = 2,
};
enum btrfs_disk_cache_state {
BTRFS_DC_WRITTEN = 0,
BTRFS_DC_ERROR = 1,
BTRFS_DC_CLEAR = 2,
BTRFS_DC_SETUP = 3,
BTRFS_DC_NEED_WRITE = 4,
};
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
wait_queue_head_t wait;
struct btrfs_block_group_cache *block_group;
u64 progress;
atomic_t count;
};
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
struct inode *inode;
spinlock_t lock;
u64 pinned;
u64 reserved;
u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1;
int disk_cache_state;
/* cache tracking stuff */
int cached;
struct btrfs_caching_control *caching_ctl;
u64 last_byte_to_unpin;
struct btrfs_space_info *space_info;
/* free space cache stuff */
struct btrfs_free_space_ctl *free_space_ctl;
/* block group cache stuff */
struct rb_node cache_node;
/* for block groups in the same raid type */
struct list_head list;
/* usage count */
atomic_t count;
/* List of struct btrfs_free_clusters for this block group.
* Today it will only have one thing on it, but that may change
*/
struct list_head cluster_list;
};
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_delayed_root;
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
struct btrfs_root *extent_root;
struct btrfs_root *tree_root;
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *fs_root;
struct btrfs_root *csum_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
spinlock_t fs_roots_radix_lock;
struct radix_tree_root fs_roots_radix;
/* block group cache stuff */
spinlock_t block_group_cache_lock;
struct rb_root block_group_cache_tree;
struct extent_io_tree freed_extents[2];
struct extent_io_tree *pinned_extents;
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
/*
* block reservation for extent, checksum, root tree and
* delayed dir index item
*/
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
struct btrfs_block_rsv chunk_block_rsv;
struct btrfs_block_rsv empty_block_rsv;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
struct mutex durable_block_rsv_mutex;
u64 generation;
u64 last_trans_committed;
/*
* this is updated to the current trans every time a full commit
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
unsigned long mount_opt:20;
unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
wait_queue_head_t transaction_blocked_wait;
wait_queue_head_t async_submit_wait;
struct btrfs_super_block super_copy;
struct btrfs_super_block super_for_commit;
struct block_device *__bdev;
struct super_block *sb;
struct inode *btree_inode;
struct backing_dev_info bdi;
struct mutex trans_mutex;
struct mutex tree_log_mutex;
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
struct mutex volume_mutex;
/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
* sure the commit code doesn't find the list temporarily empty
* because another function happens to be doing non-waiting preflush
* before jumping into the main commit.
*/
struct mutex ordered_operations_mutex;
struct rw_semaphore extent_commit_sem;
struct rw_semaphore cleanup_work_sem;
struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
struct list_head caching_block_groups;
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
/*
* this is used by the balancing code to wait for all the pending
* ordered extents
*/
spinlock_t ordered_extent_lock;
/*
* all of the data=ordered extents pending writeback
* these can span multiple transactions and basically include
* every dirty data page that isn't from nodatacow
*/
struct list_head ordered_extents;
/*
* all of the inodes that have delalloc bytes. It is possible for
* this list to be empty even when there is still dirty data=ordered
* extents waiting to finish IO.