forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
f2fs.h
4140 lines (3613 loc) · 132 KB
/
f2fs.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/f2fs.h
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*/
#ifndef _LINUX_F2FS_H
#define _LINUX_F2FS_H
#include <linux/uio.h>
#include <linux/types.h>
#include <linux/page-flags.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/magic.h>
#include <linux/kobject.h>
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/quotaops.h>
#include <linux/part_stat.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
#else
#define f2fs_bug_on(sbi, condition) \
do { \
if (WARN_ON(condition)) \
set_sbi_flag(sbi, SBI_NEED_FSCK); \
} while (0)
#endif
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
FAULT_PAGE_ALLOC,
FAULT_PAGE_GET,
FAULT_ALLOC_BIO,
FAULT_ALLOC_NID,
FAULT_ORPHAN,
FAULT_BLOCK,
FAULT_DIR_DEPTH,
FAULT_EVICT_INODE,
FAULT_TRUNCATE,
FAULT_READ_IO,
FAULT_CHECKPOINT,
FAULT_DISCARD,
FAULT_WRITE_IO,
FAULT_MAX,
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
extern const char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
/*
* For mount options
*/
#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
#define F2FS_MOUNT_DISCARD 0x00000004
#define F2FS_MOUNT_NOHEAP 0x00000008
#define F2FS_MOUNT_XATTR_USER 0x00000010
#define F2FS_MOUNT_POSIX_ACL 0x00000020
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_INLINE_DENTRY 0x00000200
#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
#define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
#define F2FS_MOUNT_USRQUOTA 0x00080000
#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define F2FS_MOUNT_PRJQUOTA 0x00200000
#define F2FS_MOUNT_QUOTA 0x00400000
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_MOUNT_NORECOVERY 0x04000000
#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option)
#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option)
#define ver_after(a, b) (typecheck(unsigned long long, a) && \
typecheck(unsigned long long, b) && \
((long long)((a) - (b)) > 0))
typedef u32 block_t; /*
* should not change u32, since it is the on-disk block
* address format, __le32.
*/
typedef u32 nid_t;
#define COMPRESS_EXT_NUM 16
struct f2fs_mount_info {
unsigned int opt;
int write_io_size_bits; /* Write IO size bits */
block_t root_reserved_blocks; /* root reserved blocks */
kuid_t s_resuid; /* reserved blocks for uid */
kgid_t s_resgid; /* reserved blocks for gid */
int active_logs; /* # of active logs */
int inline_xattr_size; /* inline xattr size */
#ifdef CONFIG_F2FS_FAULT_INJECTION
struct f2fs_fault_info fault_info; /* For fault injection */
#endif
#ifdef CONFIG_QUOTA
/* Names of quota files with journalled quota */
char *s_qf_names[MAXQUOTAS];
int s_jquota_fmt; /* Format of quota to use */
#endif
/* For which write hints are passed down to block layer */
int whint_mode;
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */
block_t unusable_cap_perc; /* percentage for cap */
block_t unusable_cap; /* Amount of space allowed to be
* unusable when disabling checkpoint
*/
/* For compression */
unsigned char compress_algorithm; /* algorithm type */
unsigned char compress_log_size; /* cluster log size */
bool compress_chksum; /* compressed data chksum */
unsigned char compress_ext_cnt; /* extension count */
int compress_mode; /* compression mode */
unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
#define F2FS_FEATURE_BLKZONED 0x0002
#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
#define F2FS_FEATURE_EXTRA_ATTR 0x0008
#define F2FS_FEATURE_PRJQUOTA 0x0010
#define F2FS_FEATURE_INODE_CHKSUM 0x0020
#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_FEATURE_INODE_CRTIME 0x0100
#define F2FS_FEATURE_LOST_FOUND 0x0200
#define F2FS_FEATURE_VERITY 0x0400
#define F2FS_FEATURE_SB_CHKSUM 0x0800
#define F2FS_FEATURE_CASEFOLD 0x1000
#define F2FS_FEATURE_COMPRESSION 0x2000
#define __F2FS_HAS_FEATURE(raw_super, mask) \
((raw_super->feature & cpu_to_le32(mask)) != 0)
#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask)
#define F2FS_SET_FEATURE(sbi, mask) \
(sbi->raw_super->feature |= cpu_to_le32(mask))
#define F2FS_CLEAR_FEATURE(sbi, mask) \
(sbi->raw_super->feature &= ~cpu_to_le32(mask))
/*
* Default values for user and/or group using reserved blocks
*/
#define F2FS_DEF_RESUID 0
#define F2FS_DEF_RESGID 0
/*
* For checkpoint manager
*/
enum {
NAT_BITMAP,
SIT_BITMAP
};
#define CP_UMOUNT 0x00000001
#define CP_FASTBOOT 0x00000002
#define CP_SYNC 0x00000004
#define CP_RECOVERY 0x00000008
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
#define CP_PAUSE 0x00000040
#define CP_RESIZE 0x00000080
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
struct cp_control {
int reason;
__u64 trim_start;
__u64 trim_end;
__u64 trim_minlen;
};
/*
* indicate meta/data type
*/
enum {
META_CP,
META_NAT,
META_SIT,
META_SSA,
META_MAX,
META_POR,
DATA_GENERIC, /* check range only */
DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */
DATA_GENERIC_ENHANCE_READ, /*
* strong check on range and segment
* bitmap but no warning due to race
* condition of read on truncated area
* by extent_cache
*/
META_GENERIC,
};
/* for the list of ino */
enum {
ORPHAN_INO, /* for orphan ino list */
APPEND_INO, /* for append ino list */
UPDATE_INO, /* for update ino list */
TRANS_DIR_INO, /* for trasactions dir ino list */
FLUSH_INO, /* for multiple device flushing */
MAX_INO_ENTRY, /* max. list */
};
struct ino_entry {
struct list_head list; /* list head */
nid_t ino; /* inode number */
unsigned int dirty_device; /* dirty device bitmap */
};
/* for the list of inodes to be GCed */
struct inode_entry {
struct list_head list; /* list head */
struct inode *inode; /* vfs inode pointer */
};
struct fsync_node_entry {
struct list_head list; /* list head */
struct page *page; /* warm node page pointer */
unsigned int seq_id; /* sequence id */
};
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
block_t start_blkaddr; /* start blockaddr of current segment */
unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */
};
/* default discard granularity of inner discard thread, unit: block count */
#define DEFAULT_DISCARD_GRANULARITY 16
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : ((blk_num) - 1))
enum {
D_PREP, /* initial */
D_PARTIAL, /* partially submitted */
D_SUBMIT, /* all submitted */
D_DONE, /* finished */
};
struct discard_info {
block_t lstart; /* logical start address */
block_t len; /* length */
block_t start; /* actual start address in dev */
};
struct discard_cmd {
struct rb_node rb_node; /* rb node located in rb-tree */
union {
struct {
block_t lstart; /* logical start address */
block_t len; /* length */
block_t start; /* actual start address in dev */
};
struct discard_info di; /* discard info */
};
struct list_head list; /* command list */
struct completion wait; /* compleation */
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
unsigned char queued; /* queued discard */
int error; /* bio error */
spinlock_t lock; /* for state/bio_ref updating */
unsigned short bio_ref; /* bio reference count */
};
enum {
DPOLICY_BG,
DPOLICY_FORCE,
DPOLICY_FSTRIM,
DPOLICY_UMOUNT,
MAX_DPOLICY,
};
struct discard_policy {
int type; /* type of discard */
unsigned int min_interval; /* used for candidates exist */
unsigned int mid_interval; /* used for device busy */
unsigned int max_interval; /* used for candidates not exist */
unsigned int max_requests; /* # of discards issued per round */
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
bool timeout; /* discard timeout for put_super */
unsigned int granularity; /* discard granularity */
};
struct discard_cmd_control {
struct task_struct *f2fs_issue_discard; /* discard thread */
struct list_head entry_list; /* 4KB discard entry list */
struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
struct list_head wait_list; /* store on-flushing entries */
struct list_head fstrim_list; /* in-flight discard from fstrim */
wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
unsigned int discard_wake; /* to wake up discard thread */
struct mutex cmd_lock;
unsigned int nr_discards; /* # of discards in the list */
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
atomic_t queued_discard; /* # of queued discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root_cached root; /* root of discard rb-tree */
bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
struct fsync_inode_entry {
struct list_head list; /* list head */
struct inode *inode; /* vfs inode pointer */
block_t blkaddr; /* block address locating the last fsync */
block_t last_dentry; /* block address locating the last dentry */
};
#define nats_in_cursum(jnl) (le16_to_cpu((jnl)->n_nats))
#define sits_in_cursum(jnl) (le16_to_cpu((jnl)->n_sits))
#define nat_in_journal(jnl, i) ((jnl)->nat_j.entries[i].ne)
#define nid_in_journal(jnl, i) ((jnl)->nat_j.entries[i].nid)
#define sit_in_journal(jnl, i) ((jnl)->sit_j.entries[i].se)
#define segno_in_journal(jnl, i) ((jnl)->sit_j.entries[i].segno)
#define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl))
#define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl))
static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
{
int before = nats_in_cursum(journal);
journal->n_nats = cpu_to_le16(before + i);
return before;
}
static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
{
int before = sits_in_cursum(journal);
journal->n_sits = cpu_to_le16(before + i);
return before;
}
static inline bool __has_cursum_space(struct f2fs_journal *journal,
int size, int type)
{
if (type == NAT_JOURNAL)
return size <= MAX_NAT_JENTRIES(journal);
return size <= MAX_SIT_JENTRIES(journal);
}
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
static inline int get_extra_isize(struct inode *inode);
static inline int get_inline_xattr_addrs(struct inode *inode);
#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
(CUR_ADDRS_PER_INODE(inode) - \
get_inline_xattr_addrs(inode) - \
DEF_INLINE_RESERVED_SIZE))
/* for inline dir */
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
#define INLINE_DENTRY_BITMAP_SIZE(inode) \
DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY(inode) + \
INLINE_DENTRY_BITMAP_SIZE(inode)))
/*
* For INODE and NODE manager
*/
/* for directory operations */
struct f2fs_filename {
/*
* The filename the user specified. This is NULL for some
* filesystem-internal operations, e.g. converting an inline directory
* to a non-inline one, or roll-forward recovering an encrypted dentry.
*/
const struct qstr *usr_fname;
/*
* The on-disk filename. For encrypted directories, this is encrypted.
* This may be NULL for lookups in an encrypted dir without the key.
*/
struct fscrypt_str disk_name;
/* The dirhash of this filename */
f2fs_hash_t hash;
#ifdef CONFIG_FS_ENCRYPTION
/*
* For lookups in encrypted directories: either the buffer backing
* disk_name, or a buffer that holds the decoded no-key name.
*/
struct fscrypt_str crypto_buf;
#endif
#ifdef CONFIG_UNICODE
/*
* For casefolded directories: the casefolded name, but it's left NULL
* if the original name is not valid Unicode, if the directory is both
* casefolded and encrypted and its encryption key is unavailable, or if
* the filesystem is doing an internal operation where usr_fname is also
* NULL. In all these cases we fall back to treating the name as an
* opaque byte sequence.
*/
struct fscrypt_str cf_name;
#endif
};
struct f2fs_dentry_ptr {
struct inode *inode;
void *bitmap;
struct f2fs_dir_entry *dentry;
__u8 (*filename)[F2FS_SLOT_LEN];
int max;
int nr_bitmap;
};
static inline void make_dentry_ptr_block(struct inode *inode,
struct f2fs_dentry_ptr *d, struct f2fs_dentry_block *t)
{
d->inode = inode;
d->max = NR_DENTRY_IN_BLOCK;
d->nr_bitmap = SIZE_OF_DENTRY_BITMAP;
d->bitmap = t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
}
static inline void make_dentry_ptr_inline(struct inode *inode,
struct f2fs_dentry_ptr *d, void *t)
{
int entry_cnt = NR_INLINE_DENTRY(inode);
int bitmap_size = INLINE_DENTRY_BITMAP_SIZE(inode);
int reserved_size = INLINE_RESERVED_SIZE(inode);
d->inode = inode;
d->max = entry_cnt;
d->nr_bitmap = bitmap_size;
d->bitmap = t;
d->dentry = t + bitmap_size + reserved_size;
d->filename = t + bitmap_size + reserved_size +
SIZE_OF_DIR_ENTRY * entry_cnt;
}
/*
* XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
* as its node offset to distinguish from index node blocks.
* But some bits are used to mark the node block.
*/
#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \
>> OFFSET_BIT_SHIFT)
enum {
ALLOC_NODE, /* allocate a new node page if needed */
LOOKUP_NODE, /* look up a node without readahead */
LOOKUP_NODE_RA, /*
* look up a node with readahead called
* by get_data_block.
*/
};
#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
/* congestion wait timeout value, default: 20ms */
#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20))
/* maximum retry quota flush count */
#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
/* number of extent info in extent cache we try to shrink */
#define EXTENT_CACHE_SHRINK_NUMBER 128
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
union {
struct {
unsigned int ofs; /* start offset of the entry */
unsigned int len; /* length of the entry */
};
unsigned long long key; /* 64-bits key */
} __packed;
};
struct extent_info {
unsigned int fofs; /* start offset in a file */
unsigned int len; /* length of the extent */
u32 blk; /* start block address of the extent */
};
struct extent_node {
struct rb_node rb_node; /* rb node located in rb-tree */
struct extent_info ei; /* extent info */
struct list_head list; /* node in global extent list of sbi */
struct extent_tree *et; /* extent tree pointer */
};
struct extent_tree {
nid_t ino; /* inode number */
struct rb_root_cached root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
atomic_t node_cnt; /* # of extent node in rb-tree*/
bool largest_updated; /* largest extent updated */
};
/*
* This structure is taken from ext4_map_blocks.
*
* Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks().
*/
#define F2FS_MAP_NEW (1 << BH_New)
#define F2FS_MAP_MAPPED (1 << BH_Mapped)
#define F2FS_MAP_UNWRITTEN (1 << BH_Unwritten)
#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\
F2FS_MAP_UNWRITTEN)
struct f2fs_map_blocks {
block_t m_pblk;
block_t m_lblk;
unsigned int m_len;
unsigned int m_flags;
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
bool m_may_create; /* indicate it is from write path */
};
/* for flag in get_data_block */
enum {
F2FS_GET_BLOCK_DEFAULT,
F2FS_GET_BLOCK_FIEMAP,
F2FS_GET_BLOCK_BMAP,
F2FS_GET_BLOCK_DIO,
F2FS_GET_BLOCK_PRE_DIO,
F2FS_GET_BLOCK_PRE_AIO,
F2FS_GET_BLOCK_PRECACHE,
};
/*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
*/
#define FADVISE_COLD_BIT 0x01
#define FADVISE_LOST_PINO_BIT 0x02
#define FADVISE_ENCRYPT_BIT 0x04
#define FADVISE_ENC_NAME_BIT 0x08
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT)
#define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT)
#define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT)
#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
#define DEF_DIR_LEVEL 0
enum {
GC_FAILURE_PIN,
GC_FAILURE_ATOMIC,
MAX_GC_FAILURE
};
/* used for f2fs_inode_info->flags */
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
FI_DIRTY_INODE, /* indicate inode is dirty or not */
FI_AUTO_RECOVER, /* indicate inode is recoverable */
FI_DIRTY_DIR, /* indicate directory has dirty pages */
FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */
FI_FREE_NID, /* free allocated nide */
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
FI_INLINE_DENTRY, /* used for inline dentry */
FI_APPEND_WRITE, /* inode has appended data */
FI_UPDATE_WRITE, /* inode has in-place-update data */
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
FI_VOLATILE_FILE, /* indicate volatile file */
FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
FI_HOT_DATA, /* indicate file is hot */
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */
FI_MMAP_FILE, /* indicate file was mmapped */
FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */
FI_MAX, /* max flag, never be used */
};
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
unsigned char i_dir_level; /* use for dentry level for large dir */
unsigned int i_current_depth; /* only for directory depth */
/* for gc failure statistic */
unsigned int i_gc_failures[MAX_GC_FAILURE];
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
struct task_struct *task; /* lookup and create consistency */
struct task_struct *cp_task; /* separate cp/wb IO stats*/
nid_t i_xattr_nid; /* node id that contains xattrs */
loff_t last_disk_size; /* lastly written file size */
spinlock_t i_size_lock; /* protect last_disk_size */
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
/* quota space reservation, managed internally by quota code */
qsize_t i_reserved_quota;
#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
struct list_head inmem_ilist; /* list for inmem inodes */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
pgoff_t ra_offset; /* ongoing readahead offset */
struct extent_tree *extent_tree; /* cached extent_tree entry */
/* avoid racing between foreground op and gc */
struct rw_semaphore i_gc_rwsem[2];
struct rw_semaphore i_mmap_sem;
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
int i_inline_xattr_size; /* inline xattr size */
struct timespec64 i_crtime; /* inode creation time */
struct timespec64 i_disk_time[4];/* inode disk times */
/* for file compress */
atomic_t i_compr_blocks; /* # of compressed blocks */
unsigned char i_compress_algorithm; /* algorithm type */
unsigned char i_log_cluster_size; /* log of cluster size */
unsigned short i_compress_flag; /* compress flag */
unsigned int i_cluster_size; /* cluster size */
};
static inline void get_extent_info(struct extent_info *ext,
struct f2fs_extent *i_ext)
{
ext->fofs = le32_to_cpu(i_ext->fofs);
ext->blk = le32_to_cpu(i_ext->blk);
ext->len = le32_to_cpu(i_ext->len);
}
static inline void set_raw_extent(struct extent_info *ext,
struct f2fs_extent *i_ext)
{
i_ext->fofs = cpu_to_le32(ext->fofs);
i_ext->blk = cpu_to_le32(ext->blk);
i_ext->len = cpu_to_le32(ext->len);
}
static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
u32 blk, unsigned int len)
{
ei->fofs = fofs;
ei->blk = blk;
ei->len = len;
}
static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
(back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
struct discard_info *back, unsigned int max_len)
{
return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
struct discard_info *front, unsigned int max_len)
{
return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
struct extent_info *front)
{
return (back->fofs + back->len == front->fofs &&
back->blk + back->len == front->blk);
}
static inline bool __is_back_mergeable(struct extent_info *cur,
struct extent_info *back)
{
return __is_extent_mergeable(back, cur);
}
static inline bool __is_front_mergeable(struct extent_info *cur,
struct extent_info *front)
{
return __is_extent_mergeable(cur, front);
}
extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
static inline void __try_update_largest_extent(struct extent_tree *et,
struct extent_node *en)
{
if (en->ei.len > et->largest.len) {
et->largest = en->ei;
et->largest_updated = true;
}
}
/*
* For free nid management
*/
enum nid_state {
FREE_NID, /* newly added to free nid list */
PREALLOC_NID, /* it is preallocated */
MAX_NID_STATE,
};
enum nat_state {
TOTAL_NAT,
DIRTY_NAT,
RECLAIMABLE_NAT,
MAX_NAT_STATE,
};
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
nid_t available_nids; /* # of available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
unsigned int nat_blocks; /* # of nat blocks */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
struct list_head free_nid_list; /* list for free nids excluding preallocated nids */
unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
unsigned char **free_nid_bitmap;
unsigned char *nat_block_bitmap;
unsigned short *free_nid_count; /* free nid count of NAT block */
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
unsigned int nat_bits_blocks; /* # of nat bits blocks */
unsigned char *nat_bits; /* NAT bits blocks */
unsigned char *full_nat_bits; /* full NAT pages */
unsigned char *empty_nat_bits; /* empty NAT pages */
#ifdef CONFIG_F2FS_CHECK_FS
char *nat_bitmap_mir; /* NAT bitmap mirror */
#endif
int bitmap_size; /* bitmap size */
};
/*
* this structure is used as one of function parameters.
* all the information are dedicated to a given direct node block determined
* by the data offset in a file.
*/
struct dnode_of_data {
struct inode *inode; /* vfs inode pointer */
struct page *inode_page; /* its inode page, NULL is possible */
struct page *node_page; /* cached direct node page */
nid_t nid; /* node id of the direct node block */
unsigned int ofs_in_node; /* data offset in the node page */
bool inode_page_locked; /* inode page is locked or not */
bool node_changed; /* is node block changed */
char cur_level; /* level of hole node page */
char max_level; /* level of current page located */
block_t data_blkaddr; /* block address of the node block */
};
static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
struct page *ipage, struct page *npage, nid_t nid)
{
memset(dn, 0, sizeof(*dn));
dn->inode = inode;
dn->inode_page = ipage;
dn->node_page = npage;
dn->nid = nid;
}
/*
* For SIT manager
*
* By default, there are 6 active log areas across the whole main area.
* When considering hot and cold data separation to reduce cleaning overhead,
* we split 3 for data logs and 3 for node logs as hot, warm, and cold types,
* respectively.
* In the current design, you should not change the numbers intentionally.
* Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6
* logs individually according to the underlying devices. (default: 6)
* Just in case, on-disk layout covers maximum 16 logs that consist of 8 for
* data and 8 for node logs.
*/
#define NR_CURSEG_DATA_TYPE (3)
#define NR_CURSEG_NODE_TYPE (3)
#define NR_CURSEG_INMEM_TYPE (2)
#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
enum {
CURSEG_HOT_DATA = 0, /* directory entry blocks */
CURSEG_WARM_DATA, /* data blocks */
CURSEG_COLD_DATA, /* multimedia or GCed data blocks */
CURSEG_HOT_NODE, /* direct node blocks of directory files */
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
NR_PERSISTENT_LOG, /* number of persistent log */
CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
/* pinned file that needs consecutive block address */
CURSEG_ALL_DATA_ATGC, /* SSR alloctor in hot/warm/cold data area */
NO_CHECK_TYPE, /* number of persistent & inmem log */
};
struct flush_cmd {
struct completion wait;
struct llist_node llnode;
nid_t ino;
int ret;
};
struct flush_cmd_control {
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
atomic_t issued_flush; /* # of issued flushes */
atomic_t queued_flush; /* # of queued flushes */
struct llist_head issue_list; /* list for command issue */
struct llist_node *dispatch_list; /* list for command dispatch */
};
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
struct rw_semaphore curseg_lock; /* for preventing curseg change */
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
unsigned int segment_count; /* total # of segments */
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
unsigned int ovp_segments; /* # of overprovision segments */
/* a threshold to reclaim prefree segments */
unsigned int rec_prefree_segments;
/* for batched trimming */
unsigned int trim_sections; /* # of sections to trim */
struct list_head sit_entry_set; /* sit entry set list */
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
unsigned int min_seq_blocks; /* threshold for sequential blocks */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
/* for flush command control */
struct flush_cmd_control *fcc_info;
/* for discard command control */
struct discard_cmd_control *dcc_info;
};
/*
* For superblock
*/
/*
* COUNT_TYPE for monitoring
*
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
F2FS_DIRTY_QDATA,