From 5d799881391277e52787df06bafe865e91ae8296 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Apr 2015 16:07:38 -0700 Subject: [PATCH 01/83] f2fs: export more enums for tracepoint This patch exports newly added enums to userspace. Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e202dec22e1dd1..7a61ccc90a9cbd 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -13,6 +13,10 @@ TRACE_DEFINE_ENUM(NODE); TRACE_DEFINE_ENUM(DATA); TRACE_DEFINE_ENUM(META); TRACE_DEFINE_ENUM(META_FLUSH); +TRACE_DEFINE_ENUM(INMEM); +TRACE_DEFINE_ENUM(INMEM_DROP); +TRACE_DEFINE_ENUM(IPU); +TRACE_DEFINE_ENUM(OPU); TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); TRACE_DEFINE_ENUM(CURSEG_WARM_DATA); TRACE_DEFINE_ENUM(CURSEG_COLD_DATA); @@ -37,6 +41,7 @@ TRACE_DEFINE_ENUM(__REQ_META); TRACE_DEFINE_ENUM(CP_UMOUNT); TRACE_DEFINE_ENUM(CP_FASTBOOT); TRACE_DEFINE_ENUM(CP_SYNC); +TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); #define show_block_type(type) \ From 272e083f7a968bc81dd01e4f16860a1e9617d454 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 18 Apr 2015 18:03:58 +0800 Subject: [PATCH 02/83] f2fs: make posix_acl_create() safer and cleaner Our f2fs_acl_create is copied from posix_acl_create in ./fs/posix_acl.c and modified to avoid deadlock bug when inline_dentry feature is enabled. Dan Carpenter rewrites posix_acl_create in commit 2799563b281f ("fs/posix_acl.c: make posix_acl_create() safer and cleaner") to make this function more safer, so that we can avoid potential bug in its caller, especially for ocfs2. Let's back port the patch to f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 4320ffab3495bb..c8f25f7241f06a 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -334,51 +334,45 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, struct page *dpage) { struct posix_acl *p; + struct posix_acl *clone; int ret; + *acl = NULL; + *default_acl = NULL; + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) - goto no_acl; + return 0; p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); - if (IS_ERR(p)) { - if (p == ERR_PTR(-EOPNOTSUPP)) - goto apply_umask; - return PTR_ERR(p); + if (!p || p == ERR_PTR(-EOPNOTSUPP)) { + *mode &= ~current_umask(); + return 0; } + if (IS_ERR(p)) + return PTR_ERR(p); - if (!p) - goto apply_umask; - - *acl = f2fs_acl_clone(p, GFP_NOFS); - if (!*acl) + clone = f2fs_acl_clone(p, GFP_NOFS); + if (!clone) goto no_mem; - ret = f2fs_acl_create_masq(*acl, mode); + ret = f2fs_acl_create_masq(clone, mode); if (ret < 0) goto no_mem_clone; - if (ret == 0) { - posix_acl_release(*acl); - *acl = NULL; - } + if (ret == 0) + posix_acl_release(clone); + else + *acl = clone; - if (!S_ISDIR(*mode)) { + if (!S_ISDIR(*mode)) posix_acl_release(p); - *default_acl = NULL; - } else { + else *default_acl = p; - } - return 0; -apply_umask: - *mode &= ~current_umask(); -no_acl: - *default_acl = NULL; - *acl = NULL; return 0; no_mem_clone: - posix_acl_release(*acl); + posix_acl_release(clone); no_mem: posix_acl_release(p); return -ENOMEM; From f0c9cadae6706b6ce00ef724121fbf0f34187e22 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 18 Apr 2015 18:05:36 +0800 Subject: [PATCH 03/83] f2fs: use is_valid_blkaddr to verify blkaddr for readability Export is_valid_blkaddr() and use it to replace some codes for readability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +-- fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a5e17a2a078147..72f64b314d2eac 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -77,8 +77,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return page; } -static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, - block_t blkaddr, int type) +bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { switch (type) { case META_NAT: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8de34ab6d5b1c5..da5bc7dd25ba46 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1581,6 +1581,7 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 8d8ea99f215627..b80d9d48f12582 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -174,7 +174,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) while (1) { struct fsync_inode_entry *entry; - if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; page = get_meta_page(sbi, blkaddr); @@ -396,7 +396,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, dest = datablock_addr(page, dn.ofs_in_node); if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && - dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) { + is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); @@ -454,7 +454,7 @@ static int recover_data(struct f2fs_sb_info *sbi, while (1) { struct fsync_inode_entry *entry; - if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) break; ra_meta_pages_cond(sbi, blkaddr); From 587c0a42552a69a58e7ccd363d30fd48d809fdcb Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 21 Apr 2015 15:59:12 +0900 Subject: [PATCH 04/83] f2fs: add offset check routine before punch_hole() in f2fs_fallocate() In the punch_hole(), if offset bigger than inode size, it returns SUCCESS. Then f2fs_fallocate() will update time and dirty mark. In that case, inode has not been modified actually. So I have added offset check routine that prevent to call the punch_hole(). Signed-off-by: Taehee Yoo Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2b52e48d74824d..3d6de54fead08b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -718,10 +718,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; - /* skip punching hole beyond i_size */ - if (offset >= inode->i_size) - return ret; - if (f2fs_has_inline_data(inode)) { ret = f2fs_convert_inline_inode(inode); if (ret) @@ -830,15 +826,19 @@ static long f2fs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - long ret; + long ret = 0; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) + if (mode & FALLOC_FL_PUNCH_HOLE) { + if (offset >= inode->i_size) + goto out; + ret = punch_hole(inode, offset, len); + } else ret = expand_inode_data(inode, offset, len, mode); @@ -847,6 +847,7 @@ static long f2fs_fallocate(struct file *file, int mode, mark_inode_dirty(inode); } +out: mutex_unlock(&inode->i_mutex); trace_f2fs_fallocate(inode, mode, offset, len, ret); From 2aa7c51a453fbd5b0a58156623ea950bcb9db526 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 18 Apr 2015 18:06:49 +0800 Subject: [PATCH 05/83] f2fs: make has_fsynced_inode static has_fsynced_inode() has no other caller out of node.c, make it static. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/node.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da5bc7dd25ba46..8be6cab3cbe452 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1507,7 +1507,6 @@ struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); -bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); bool need_inode_block_update(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8ab0cf1930bd2f..1676c7a454b214 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -209,7 +209,7 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) return is_cp; } -bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) +static bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; From 0040b933187b11f78e83dd162a31d64a46be4e37 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 11:52:23 -0700 Subject: [PATCH 06/83] f2fs: add missing version info in superblock The mkfs.f2fs remains kernel version in superblock, but f2fs module has not added that so far. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 591f8c3ef41091..8d345c24bcf7e3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -50,6 +50,8 @@ #define MAX_ACTIVE_NODE_LOGS 8 #define MAX_ACTIVE_DATA_LOGS 8 +#define VERSION_LEN 256 + /* * For superblock */ @@ -86,6 +88,9 @@ struct f2fs_super_block { __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ __le32 cp_payload; + __u8 version[VERSION_LEN]; /* the kernel version */ + __u8 init_version[VERSION_LEN]; /* the initial kernel version */ + __u8 reserved[892]; /* valid reserved region */ } __packed; /* From b5492af78c89b52a8e7273445b2248b397a15333 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 13:44:41 -0700 Subject: [PATCH 07/83] f2fs: move existing definitions into f2fs.h This patch moves some inode-related definitions from node.h to f2fs.h to add new features. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 22 ++++++++++++++++++++++ fs/f2fs/node.h | 22 ---------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8be6cab3cbe452..cd9748af1b6386 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -320,6 +320,13 @@ struct extent_tree { #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + #define DEF_DIR_LEVEL 0 struct f2fs_inode_info { @@ -1391,6 +1398,21 @@ static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) kunmap(page); } +static inline int is_file(struct inode *inode, int type) +{ + return F2FS_I(inode)->i_advise & type; +} + +static inline void set_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise |= type; +} + +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index c56026f1725c90..7427e956ad8143 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -343,28 +343,6 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_file(struct inode *inode, int type) -{ - return F2FS_I(inode)->i_advise & type; -} - -static inline void set_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise |= type; -} - -static inline void clear_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise &= ~type; -} - -#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) -#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) -#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) -#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) -#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) -#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) - static inline int is_cold_data(struct page *page) { return PageChecked(page); From 76f105a2dbcd47509bac6ba8d94cb3759a3e6e9d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Apr 2015 15:10:36 -0700 Subject: [PATCH 08/83] f2fs: add feature facility in superblock This patch introduces a feature in superblock, which will indicate any new features for f2fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +++++++ include/linux/f2fs_fs.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cd9748af1b6386..e1dd986262cb37 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -70,6 +70,13 @@ struct f2fs_mount_info { unsigned int opt; }; +#define F2FS_HAS_FEATURE(sb, mask) \ + ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_SET_FEATURE(sb, mask) \ + F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask) +#define F2FS_CLEAR_FEATURE(sb, mask) \ + F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask) + #define CRCPOLY_LE 0xedb88320 static inline __u32 f2fs_crc32(void *buf, size_t len) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8d345c24bcf7e3..d44e97f2b98ee8 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -90,7 +90,8 @@ struct f2fs_super_block { __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ - __u8 reserved[892]; /* valid reserved region */ + __le32 feature; /* defined features */ + __u8 reserved[888]; /* valid reserved region */ } __packed; /* From 003a3e1d60b0bb5cfb4feffb05a2083db2346364 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Apr 2015 19:55:34 -0700 Subject: [PATCH 09/83] f2fs: add f2fs_map_blocks This patch introduces f2fs_map_blocks structure likewise ext4_map_blocks. Now, f2fs uses f2fs_map_blocks when handling get_block. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 72 +++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 16 +++++++++ include/trace/events/f2fs.h | 28 +++++++-------- 3 files changed, 71 insertions(+), 45 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1e1aae669fa862..aa3c079ffd94d6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -251,19 +251,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } -static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, - struct extent_info *ei, struct buffer_head *bh_result) -{ - unsigned int blkbits = sb->s_blocksize_bits; - size_t max_size = bh_result->b_size; - size_t mapped_size; - - clear_buffer_new(bh_result); - map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); - mapped_size = (ei->fofs + ei->len - pgofs) << blkbits; - bh_result->b_size = min(max_size, mapped_size); -} - static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -1208,18 +1195,18 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, } /* - * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. + * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with + * f2fs_map_blocks structure. * If original data blocks are allocated, then give them to blockdev. * Otherwise, * a. preallocate requested block addresses * b. do not use extent cache for better performance * c. give the block addresses to blockdev */ -static int __get_data_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create, bool fiemap) +static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, + int create, bool fiemap) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - unsigned maxblocks = bh_result->b_size >> blkbits; + unsigned int maxblocks = map->m_len; struct dnode_of_data dn; int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; pgoff_t pgofs, end_offset; @@ -1227,11 +1214,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock, struct extent_info ei; bool allocated = false; - /* Get the page offset from the block offset(iblock) */ - pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + map->m_len = 0; + map->m_flags = 0; + + /* it only supports block size == page size */ + pgofs = (pgoff_t)map->m_lblk; if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { - f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); + map->m_pblk = ei.blk + pgofs - ei.fofs; + map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); + map->m_flags = F2FS_MAP_MAPPED; goto out; } @@ -1250,21 +1242,21 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { - clear_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + map->m_flags = F2FS_MAP_MAPPED; + map->m_pblk = dn.data_blkaddr; } else if (create) { err = __allocate_data_block(&dn); if (err) goto put_out; allocated = true; - set_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; + map->m_pblk = dn.data_blkaddr; } else { goto put_out; } end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); - bh_result->b_size = (((size_t)1) << blkbits); + map->m_len = 1; dn.ofs_in_node++; pgofs++; @@ -1288,22 +1280,22 @@ static int __get_data_block(struct inode *inode, sector_t iblock, end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } - if (maxblocks > (bh_result->b_size >> blkbits)) { + if (maxblocks > map->m_len) { block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (blkaddr == NULL_ADDR && create) { err = __allocate_data_block(&dn); if (err) goto sync_out; allocated = true; - set_buffer_new(bh_result); + map->m_flags |= F2FS_MAP_NEW; blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ - if (blkaddr == (bh_result->b_blocknr + ofs)) { + if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) { ofs++; dn.ofs_in_node++; pgofs++; - bh_result->b_size += (((size_t)1) << blkbits); + map->m_len++; goto get_next; } } @@ -1316,10 +1308,28 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (create) f2fs_unlock_op(F2FS_I_SB(inode)); out: - trace_f2fs_get_data_block(inode, iblock, bh_result, err); + trace_f2fs_map_blocks(inode, map, err); return err; } +static int __get_data_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create, bool fiemap) +{ + struct f2fs_map_blocks map; + int ret; + + map.m_lblk = iblock; + map.m_len = bh->b_size >> inode->i_blkbits; + + ret = f2fs_map_blocks(inode, &map, create, fiemap); + if (!ret) { + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; + bh->b_size = map.m_len << inode->i_blkbits; + } + return ret; +} + static int get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e1dd986262cb37..c98454d011ff1c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -321,6 +321,22 @@ struct extent_tree { unsigned int count; /* # of extent node in rb-tree*/ }; +/* + * This structure is taken from ext4_map_blocks. + * + * Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks(). + */ +#define F2FS_MAP_NEW (1 << BH_New) +#define F2FS_MAP_MAPPED (1 << BH_Mapped) +#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED) + +struct f2fs_map_blocks { + block_t m_pblk; + block_t m_lblk; + unsigned int m_len; + unsigned int m_flags; +}; + /* * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 7a61ccc90a9cbd..04856a2d8c82d7 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -117,6 +117,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { CP_DISCARD, "Discard" }) struct victim_sel_policy; +struct f2fs_map_blocks; DECLARE_EVENT_CLASS(f2fs__inode, @@ -481,36 +482,35 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT(f2fs_get_data_block, - TP_PROTO(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int ret), +TRACE_EVENT(f2fs_map_blocks, + TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), - TP_ARGS(inode, iblock, bh, ret), + TP_ARGS(inode, map, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(sector_t, iblock) - __field(sector_t, bh_start) - __field(size_t, bh_size) + __field(block_t, m_lblk) + __field(block_t, m_pblk) + __field(unsigned int, m_len) __field(int, ret) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->iblock = iblock; - __entry->bh_start = bh->b_blocknr; - __entry->bh_size = bh->b_size; + __entry->m_lblk = map->m_lblk; + __entry->m_pblk = map->m_pblk; + __entry->m_len = map->m_len; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, err = %d", show_dev_ino(__entry), - (unsigned long long)__entry->iblock, - (unsigned long long)__entry->bh_start, - (unsigned long long)__entry->bh_size, + (unsigned long long)__entry->m_lblk, + (unsigned long long)__entry->m_pblk, + (unsigned long long)__entry->m_len, __entry->ret) ); From 26d815ad75156a1cf2f6c7ba94cdfd32849879d7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 18:49:51 -0700 Subject: [PATCH 10/83] f2fs: introduce f2fs_commit_super This patch introduces f2fs_commit_super to write updated superblock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c98454d011ff1c..7ff3ac7a1ce81e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1535,6 +1535,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ +int f2fs_commit_super(struct f2fs_sb_info *); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b2dd1b01f07634..85841687b40a7d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -966,6 +966,30 @@ static int read_raw_super_block(struct super_block *sb, return 0; } +int f2fs_commit_super(struct f2fs_sb_info *sbi) +{ + struct buffer_head *sbh = sbi->raw_super_buf; + sector_t block = sbh->b_blocknr; + int err; + + /* write back-up superblock first */ + sbh->b_blocknr = block ? 0 : 1; + mark_buffer_dirty(sbh); + err = sync_dirty_buffer(sbh); + + sbh->b_blocknr = block; + if (err) + goto out; + + /* write current valid superblock */ + mark_buffer_dirty(sbh); + err = sync_dirty_buffer(sbh); +out: + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + return err; +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; From f1e8866016b53bd0ea108ae9adbb52da1dd53dab Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Apr 2015 11:20:42 -0700 Subject: [PATCH 11/83] f2fs: expose f2fs_mpage_readpages This patch implements f2fs_mpage_readpages for further optimization on encryption support. The basic code was taken from fs/mpage.c, and changed to be simple by adjusting that block_size is equal to page_size in f2fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa3c079ffd94d6..2a3a9cd008da21 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -47,6 +48,30 @@ static void f2fs_read_end_io(struct bio *bio, int err) bio_put(bio); } +/* + * I/O completion handler for multipage BIOs. + * copied from fs/mpage.c + */ +static void mpage_end_io(struct bio *bio, int err) +{ + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + + if (!err) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } + + bio_put(bio); +} + static void f2fs_write_end_io(struct bio *bio, int err) { struct f2fs_sb_info *sbi = bio->bi_private; @@ -1349,6 +1374,133 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, start, len, get_data_block_fiemap); } +/* + * This function was originally taken from fs/mpage.c, and customized for f2fs. + * Major change was from block_size == page_size in f2fs by default. + */ +static int f2fs_mpage_readpages(struct address_space *mapping, + struct list_head *pages, struct page *page, + unsigned nr_pages) +{ + struct bio *bio = NULL; + unsigned page_idx; + sector_t last_block_in_bio = 0; + struct inode *inode = mapping->host; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + struct block_device *bdev = inode->i_sb->s_bdev; + struct f2fs_map_blocks map; + + map.m_pblk = 0; + map.m_lblk = 0; + map.m_len = 0; + map.m_flags = 0; + + for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { + + prefetchw(&page->flags); + if (pages) { + page = list_entry(pages->prev, struct page, lru); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, + page->index, GFP_KERNEL)) + goto next_page; + } + + block_in_file = (sector_t)page->index; + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* + * Map blocks using the previous result first. + */ + if ((map.m_flags & F2FS_MAP_MAPPED) && + block_in_file > map.m_lblk && + block_in_file < (map.m_lblk + map.m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map.m_flags = 0; + + if (block_in_file < last_block) { + map.m_lblk = block_in_file; + map.m_len = last_block - block_in_file; + + if (f2fs_map_blocks(inode, &map, 0, false)) + goto set_error_page; + } +got_it: + if ((map.m_flags & F2FS_MAP_MAPPED)) { + block_nr = map.m_pblk + block_in_file - map.m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && !cleancache_get_page(page)) { + SetPageUptodate(page); + goto confused; + } + } else { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + unlock_page(page); + goto next_page; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (last_block_in_bio != block_nr - 1)) { +submit_and_realloc: + submit_bio(READ, bio); + bio = NULL; + } + if (bio == NULL) { + bio = bio_alloc(GFP_KERNEL, + min_t(int, nr_pages, bio_get_nr_vecs(bdev))); + if (!bio) + goto set_error_page; + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); + bio->bi_end_io = mpage_end_io; + bio->bi_private = NULL; + } + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + last_block_in_bio = block_nr; + goto next_page; +set_error_page: + SetPageError(page); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + unlock_page(page); + goto next_page; +confused: + if (bio) { + submit_bio(READ, bio); + bio = NULL; + } + unlock_page(page); +next_page: + if (pages) + page_cache_release(page); + } + BUG_ON(pages && !list_empty(pages)); + if (bio) + submit_bio(READ, bio); + return 0; +} + static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; @@ -1360,8 +1512,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = mpage_readpage(page, get_data_block); - + ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); return ret; } @@ -1375,7 +1526,7 @@ static int f2fs_read_data_pages(struct file *file, if (f2fs_has_inline_data(inode)) return 0; - return mpage_readpages(mapping, pages, nr_pages, get_data_block); + return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } int do_write_data_page(struct page *page, struct f2fs_io_info *fio) From 06957e8fe6945e2d3c4ab01d36e52bf31a93a05c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Apr 2015 11:40:27 -0700 Subject: [PATCH 12/83] f2fs: clean up f2fs_lookup This patch cleans up to avoid deep indentation. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 658e8079aaf9b9..a311c3ce391862 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -232,31 +232,32 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; + nid_t ino; if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (de) { - nid_t ino = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); - f2fs_put_page(page, 0); + if (!de) + return d_splice_alias(inode, dentry); - inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + ino = le32_to_cpu(de->ino); + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); - if (f2fs_has_inline_dots(inode)) { - int err; + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (f2fs_has_inline_dots(inode)) { + int err; - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) { - iget_failed(inode); - return ERR_PTR(err); - } + err = __recover_dot_dentries(inode, dir->i_ino); + if (err) { + iget_failed(inode); + return ERR_PTR(err); } } - return d_splice_alias(inode, dentry); } From 01b960e94a58d91518d5dd7001c5cd0c57335251 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2015 10:27:21 -0700 Subject: [PATCH 13/83] f2fs: add f2fs_may_inline_{data, dentry} This patch adds f2fs_may_inline_data and f2fs_may_inline_dentry. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 2 +- fs/f2fs/inline.c | 13 ++++++++++++- fs/f2fs/namei.c | 4 ++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7ff3ac7a1ce81e..2bb9b577b5983d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1839,7 +1839,8 @@ extern struct kmem_cache *inode_entry_slab; /* * inline.c */ -bool f2fs_may_inline(struct inode *); +bool f2fs_may_inline_data(struct inode *); +bool f2fs_may_inline_dentry(struct inode *); void read_inline_data(struct page *, struct page *); bool truncate_inline_inode(struct page *, u64); int f2fs_read_inline_data(struct inode *, struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3d6de54fead08b..ffd9b7e49be7d8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -560,7 +560,7 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); /* we should check inline_data size */ - if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { if (f2fs_convert_inline_inode(inode)) return; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8140e4f0e538e5..99d514815af753 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -13,7 +13,7 @@ #include "f2fs.h" -bool f2fs_may_inline(struct inode *inode) +bool f2fs_may_inline_data(struct inode *inode) { if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; @@ -30,6 +30,17 @@ bool f2fs_may_inline(struct inode *inode) return true; } +bool f2fs_may_inline_dentry(struct inode *inode) +{ + if (!test_opt(F2FS_I_SB(inode), INLINE_DENTRY)) + return false; + + if (!S_ISDIR(inode->i_mode)) + return false; + + return true; +} + void read_inline_data(struct page *page, struct page *ipage) { void *src_addr, *dst_addr; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a311c3ce391862..c0ba8e3bc5b2d8 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -56,9 +56,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto out; } - if (f2fs_may_inline(inode)) + if (f2fs_may_inline_data(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + if (f2fs_may_inline_dentry(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); trace_f2fs_new_inode(inode, 0); From 05ca3632e5a73b493b27ec3e2a337885563abff0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2015 14:38:15 -0700 Subject: [PATCH 14/83] f2fs: add sbi and page pointer in f2fs_io_info This patch adds f2fs_sb_info and page pointers in f2fs_io_info structure. With this change, we can reduce a lot of parameters for IO functions. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 +++++++-- fs/f2fs/data.c | 47 +++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 18 ++++++++--------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 4 +++- fs/f2fs/inline.c | 4 +++- fs/f2fs/node.c | 8 ++++++-- fs/f2fs/segment.c | 38 ++++++++++++++++++----------------- fs/f2fs/super.c | 2 +- fs/f2fs/trace.c | 6 +++--- fs/f2fs/trace.h | 4 ++-- 11 files changed, 83 insertions(+), 59 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 72f64b314d2eac..6dbff2b0bcd8cb 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -52,6 +52,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) struct address_space *mapping = META_MAPPING(sbi); struct page *page; struct f2fs_io_info fio = { + .sbi = sbi, .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO, .blk_addr = index, @@ -65,7 +66,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) if (PageUptodate(page)) goto out; - if (f2fs_submit_page_bio(sbi, page, &fio)) + fio.page = page; + + if (f2fs_submit_page_bio(&fio)) goto repeat; lock_page(page); @@ -117,6 +120,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type struct page *page; block_t blkno = start; struct f2fs_io_info fio = { + .sbi = sbi, .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; @@ -160,7 +164,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type continue; } - f2fs_submit_page_mbio(sbi, page, &fio); + fio.page = page; + f2fs_submit_page_mbio(&fio); f2fs_put_page(page, 0); } out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2a3a9cd008da21..81d1fd5810786e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -158,16 +158,16 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, * Fill the locked page with data located in the block address. * Return unlocked page. */ -int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio) +int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; + struct page *page = fio->page; trace_f2fs_submit_page_bio(page, fio); - f2fs_trace_ios(page, fio, 0); + f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); + bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -179,9 +179,9 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, return 0; } -void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio) +void f2fs_submit_page_mbio(struct f2fs_io_info *fio) { + struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; bool is_read = is_read_io(fio->rw); @@ -206,17 +206,17 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io->fio = *fio; } - if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < + if (bio_add_page(io->bio, fio->page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { __submit_merged_bio(io); goto alloc_new; } io->last_block_in_bio = fio->blk_addr; - f2fs_trace_ios(page, fio, 0); + f2fs_trace_ios(fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio); + trace_f2fs_submit_page_mbio(fio->page, fio); } /* @@ -925,6 +925,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct extent_info ei; int err; struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = sync ? READ_SYNC : READA, }; @@ -971,7 +972,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) } fio.blk_addr = dn.data_blkaddr; - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); + fio.page = page; + err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); @@ -998,6 +1000,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct extent_info ei; int err; struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = READ_SYNC, }; @@ -1041,7 +1044,8 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) } fio.blk_addr = dn.data_blkaddr; - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); + fio.page = page; + err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); @@ -1092,11 +1096,13 @@ struct page *get_new_data_page(struct inode *inode, SetPageUptodate(page); } else { struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = READ_SYNC, .blk_addr = dn.data_blkaddr, + .page = page, }; - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); + err = f2fs_submit_page_bio(&fio); if (err) goto put_err; @@ -1529,8 +1535,9 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } -int do_write_data_page(struct page *page, struct f2fs_io_info *fio) +int do_write_data_page(struct f2fs_io_info *fio) { + struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; int err = 0; @@ -1557,11 +1564,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(page, fio); + rewrite_data_page(fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); } else { - write_data_page(page, &dn, fio); + write_data_page(&dn, fio); set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); trace_f2fs_do_write_data_page(page, OPU); @@ -1586,8 +1593,10 @@ static int f2fs_write_data_page(struct page *page, bool need_balance_fs = false; int err = 0; struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .page = page, }; trace_f2fs_writepage(page, DATA); @@ -1617,7 +1626,7 @@ static int f2fs_write_data_page(struct page *page, if (S_ISDIR(inode->i_mode)) { if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; - err = do_write_data_page(page, &fio); + err = do_write_data_page(&fio); goto done; } @@ -1637,7 +1646,7 @@ static int f2fs_write_data_page(struct page *page, if (f2fs_has_inline_data(inode)) err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) - err = do_write_data_page(page, &fio); + err = do_write_data_page(&fio); f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) @@ -1806,11 +1815,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = READ_SYNC, .blk_addr = dn.data_blkaddr, + .page = page, }; - err = f2fs_submit_page_bio(sbi, page, &fio); + err = f2fs_submit_page_bio(&fio); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2bb9b577b5983d..e99a404d97d749 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -601,9 +601,11 @@ enum page_type { }; struct f2fs_io_info { + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ block_t blk_addr; /* block address to be written */ + struct page *page; /* page to be written */ }; #define is_read_io(rw) (((rw) & 1) == READ) @@ -1601,11 +1603,9 @@ void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(struct f2fs_sb_info *, struct page *, - unsigned int, struct f2fs_io_info *); -void write_data_page(struct page *, struct dnode_of_data *, - struct f2fs_io_info *); -void rewrite_data_page(struct page *, struct f2fs_io_info *); +void write_node_page(unsigned int, struct f2fs_io_info *); +void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); +void rewrite_data_page(struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, @@ -1653,10 +1653,8 @@ void destroy_checkpoint_caches(void); * data.c */ void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); -int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *); -void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *); +int f2fs_submit_page_bio(struct f2fs_io_info *); +void f2fs_submit_page_mbio(struct f2fs_io_info *); void set_data_blkaddr(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); @@ -1668,7 +1666,7 @@ void f2fs_preserve_extent_tree(struct inode *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int do_write_data_page(struct page *, struct f2fs_io_info *); +int do_write_data_page(struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); void init_extent_cache_info(struct f2fs_sb_info *); int __init create_extent_cache(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ffd9b7e49be7d8..0e58f021bf49d2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -271,7 +271,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); - f2fs_trace_ios(NULL, NULL, 1); + f2fs_trace_ios(NULL, 1); return ret; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ed58211fe79b45..72667a54ac5fb0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -521,8 +521,10 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static void move_data_page(struct inode *inode, struct page *page, int gc_type) { struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = WRITE_SYNC, + .page = page, }; if (gc_type == BG_GC) { @@ -536,7 +538,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); set_cold_data(page); - do_write_data_page(page, &fio); + do_write_data_page(&fio); clear_cold_data(page); } out: diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 99d514815af753..d9b3033bf6fd66 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -106,8 +106,10 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { void *src_addr, *dst_addr; struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(dn->inode), .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, + .page = page, }; int dirty, err; @@ -141,7 +143,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; - write_data_page(page, dn, &fio); + write_data_page(dn, &fio); set_data_blkaddr(dn); f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1676c7a454b214..880d5781b1b39a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -995,8 +995,10 @@ static int read_node_page(struct page *page, int rw) struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { + .sbi = sbi, .type = NODE, .rw = rw, + .page = page, }; get_node_info(sbi, page->index, &ni); @@ -1011,7 +1013,7 @@ static int read_node_page(struct page *page, int rw) return LOCKED_PAGE; fio.blk_addr = ni.blk_addr; - return f2fs_submit_page_bio(sbi, page, &fio); + return f2fs_submit_page_bio(&fio); } /* @@ -1293,8 +1295,10 @@ static int f2fs_write_node_page(struct page *page, nid_t nid; struct node_info ni; struct f2fs_io_info fio = { + .sbi = sbi, .type = NODE, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .page = page, }; trace_f2fs_writepage(page, NODE); @@ -1329,7 +1333,7 @@ static int f2fs_write_node_page(struct page *page, set_page_writeback(page); fio.blk_addr = ni.blk_addr; - write_node_page(sbi, page, nid, &fio); + write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f939660941bbc7..df8bce5379b38e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -216,6 +216,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) struct inmem_pages *cur, *tmp; bool submit_bio = false; struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; @@ -241,7 +242,8 @@ void commit_inmem_pages(struct inode *inode, bool abort) if (clear_page_dirty_for_io(cur->page)) inode_dec_dirty_pages(inode); trace_f2fs_commit_inmem_page(cur->page, INMEM); - do_write_data_page(cur->page, &fio); + fio.page = cur->page; + do_write_data_page(&fio); submit_bio = true; } f2fs_put_page(cur->page, 1); @@ -1206,56 +1208,56 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); } -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_summary *sum, - struct f2fs_io_info *fio) +static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(page, fio->type); + int type = __get_segment_type(fio->page, fio->type); - allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type); + allocate_data_block(fio->sbi, fio->page, fio->blk_addr, + &fio->blk_addr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(sbi, page, fio); + f2fs_submit_page_mbio(fio); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_io_info fio = { + .sbi = sbi, .type = META, .rw = WRITE_SYNC | REQ_META | REQ_PRIO, .blk_addr = page->index, + .page = page, }; set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, &fio); + f2fs_submit_page_mbio(&fio); } -void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - unsigned int nid, struct f2fs_io_info *fio) +void write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, &sum, fio); + do_write_page(&sum, fio); } -void write_data_page(struct page *page, struct dnode_of_data *dn, - struct f2fs_io_info *fio) +void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - do_write_page(sbi, page, &sum, fio); + do_write_page(&sum, fio); dn->data_blkaddr = fio->blk_addr; } -void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) +void rewrite_data_page(struct f2fs_io_info *fio) { - stat_inc_inplace_blocks(F2FS_P_SB(page)); - f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); + stat_inc_inplace_blocks(fio->sbi); + f2fs_submit_page_mbio(fio); } void recover_data_page(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 85841687b40a7d..138fa938c29124 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -520,7 +520,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) } else { f2fs_balance_fs(sbi); } - f2fs_trace_ios(NULL, NULL, 1); + f2fs_trace_ios(NULL, 1); return 0; } diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 875aa8179bc173..145fb659ad4458 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -80,7 +80,7 @@ void f2fs_trace_pid(struct page *page) radix_tree_preload_end(); } -void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) +void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) { struct inode *inode; pid_t pid; @@ -91,8 +91,8 @@ void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) return; } - inode = page->mapping->host; - pid = page_private(page); + inode = fio->page->mapping->host; + pid = page_private(fio->page); major = MAJOR(inode->i_sb->s_dev); minor = MINOR(inode->i_sb->s_dev); diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 1041dbeb52ae4f..67db24ac1e85aa 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -33,12 +33,12 @@ struct last_io_info { }; extern void f2fs_trace_pid(struct page *); -extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +extern void f2fs_trace_ios(struct f2fs_io_info *, int); extern void f2fs_build_trace_ios(void); extern void f2fs_destroy_trace_ios(void); #else #define f2fs_trace_pid(p) -#define f2fs_trace_ios(p, i, n) +#define f2fs_trace_ios(i, n) #define f2fs_build_trace_ios() #define f2fs_destroy_trace_ios() From c879f90da96c6369080be868162ef96aeef4a439 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Apr 2015 14:34:30 -0700 Subject: [PATCH 15/83] f2fs: move get_page for gc victims This patch moves getting victim page into move_data_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 72667a54ac5fb0..1bd11f017a23ba 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -518,14 +518,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return 1; } -static void move_data_page(struct inode *inode, struct page *page, int gc_type) +static void move_data_page(struct inode *inode, block_t bidx, int gc_type) { - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .rw = WRITE_SYNC, - .page = page, - }; + struct page *page; + + page = get_lock_data_page(inode, bidx); + if (IS_ERR(page)) + return; if (gc_type == BG_GC) { if (PageWriteback(page)) @@ -533,6 +532,12 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) set_page_dirty(page); set_cold_data(page); } else { + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .type = DATA, + .rw = WRITE_SYNC, + .page = page, + }; f2fs_wait_on_page_writeback(page, DATA); if (clear_page_dirty_for_io(page)) @@ -618,12 +623,9 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* phase 3 */ inode = find_gc_inode(gc_list, dni.ino); if (inode) { - start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); - data_page = get_lock_data_page(inode, - start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)) + + ofs_in_node; + move_data_page(inode, start_bidx, gc_type); stat_inc_data_blk_count(sbi, 1, gc_type); } } From eaa693f4dcf1ca64b41b02d907e9c401c085cf59 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 26 Apr 2015 00:15:29 -0700 Subject: [PATCH 16/83] f2fs: introduce dot and dotdot name check This patch adds an inline function to check dot and dotdot names. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/hash.c | 3 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e99a404d97d749..b8f99fd4fcff5d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1454,6 +1454,17 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= MS_RDONLY; } +static inline bool is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index a844fcfb9a8dcc..71b7206c431ea0 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -79,8 +79,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) const unsigned char *name = name_info->name; size_t len = name_info->len; - if ((len <= 2) && (name[0] == '.') && - (name[1] == '.' || name[1] == '\0')) + if (is_dot_dotdot(name_info)) return 0; /* Initialize the default seed for the hash checksum functions */ From 01f28610a1691078d0f7ba62b365567f8799f07c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 11:18:42 -0700 Subject: [PATCH 17/83] f2fs: fix race on allocating and deallocating a dentry block There are two threads: f2fs_delete_entry() get_new_data_page() f2fs_reserve_block() dn.blkaddr = XXX lock_page(dentry_block) truncate_hole() dn.blkaddr = NULL unlock_page(dentry_block) lock_page(dentry_block) fill the block from XXX address add new dentries unlock_page(dentry_block) Later, f2fs_write_data_page() will truncate the dentry_block, since its block address is NULL. The reason for this was due to the wrong lock order. In this case, we should do f2fs_reserve_block() after locking its dentry block. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 81d1fd5810786e..9ba30b435a5a43 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1076,20 +1076,22 @@ struct page *get_new_data_page(struct inode *inode, struct page *page; struct dnode_of_data dn; int err; +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); -repeat: - page = grab_cache_page(mapping, index); - if (!page) { - err = -ENOMEM; - goto put_err; } + if (!ipage) + f2fs_put_dnode(&dn); if (PageUptodate(page)) - return page; + goto got_it; if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); @@ -1104,20 +1106,19 @@ struct page *get_new_data_page(struct inode *inode, }; err = f2fs_submit_page_bio(&fio); if (err) - goto put_err; + return ERR_PTR(err); lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); - err = -EIO; - goto put_err; + return ERR_PTR(-EIO); } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } } - +got_it: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); @@ -1125,10 +1126,6 @@ struct page *get_new_data_page(struct inode *inode, set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); } return page; - -put_err: - f2fs_put_dnode(&dn); - return ERR_PTR(err); } static int __allocate_data_block(struct dnode_of_data *dn) From 2dcf51ab2f6ddec795371dad9a8eeca4cda4eee0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 18:31:19 -0700 Subject: [PATCH 18/83] f2fs: add need_dentry_mark This patch introduces need_dentry_mark() to clean up and avoid redundant node locks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 35 +++++++++++++++++------------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b8f99fd4fcff5d..9e43ddcdba84d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1565,6 +1565,7 @@ struct dnode_of_data; struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); +int need_dentry_mark(struct f2fs_sb_info *, nid_t); bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool need_inode_block_update(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 880d5781b1b39a..62982e671d1808 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -195,32 +195,35 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool is_cp = true; + bool need = false; down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (e && !get_nat_flag(e, IS_CHECKPOINTED)) - is_cp = false; + if (e) { + if (!get_nat_flag(e, IS_CHECKPOINTED) && + !get_nat_flag(e, HAS_FSYNCED_INODE)) + need = true; + } up_read(&nm_i->nat_tree_lock); - return is_cp; + return need; } -static bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool fsynced = false; + bool is_cp = true; down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); - if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) - fsynced = true; + e = __lookup_nat_cache(nm_i, nid); + if (e && !get_nat_flag(e, IS_CHECKPOINTED)) + is_cp = false; up_read(&nm_i->nat_tree_lock); - return fsynced; + return is_cp; } bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -1206,13 +1209,9 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, /* called by fsync() */ if (ino && IS_DNODE(page)) { set_fsync_mark(page, 1); - if (IS_INODE(page)) { - if (!is_checkpointed_node(sbi, ino) && - !has_fsynced_inode(sbi, ino)) - set_dentry_mark(page, 1); - else - set_dentry_mark(page, 0); - } + if (IS_INODE(page)) + set_dentry_mark(page, + need_dentry_mark(sbi, ino)); nwritten++; } else { set_fsync_mark(page, 0); From 2fb2c954968bedddfeb3895969fbdf2ae0679ed3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 18:58:22 -0700 Subject: [PATCH 19/83] f2fs: fix counting the number of inline_data inodes This patch fixes to count the missing symlink case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c0ba8e3bc5b2d8..90a96400fa4815 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -61,6 +61,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_may_inline_dentry(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -136,7 +139,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); @@ -384,7 +386,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); - stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); @@ -770,7 +771,6 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) alloc_nid_done(sbi, inode->i_ino); - stat_inc_inline_inode(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); return 0; From 43f3eae1d3b1de6a4f7e39ef9c363ec6f8b9c8d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 17:00:33 -0700 Subject: [PATCH 20/83] f2fs: split find_data_page according to specific purposes This patch splits find_data_page as follows. 1. f2fs_gc - use get_read_data_page() with read only 2. find_in_level - use find_data_page without locked page 3. truncate_partial_page - In the case cache_only mode, just drop cached page. - Ohterwise, use get_lock_data_page() and guarantee to truncate Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 127 +++++++++++++++++++------------------------------ fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 3 +- fs/f2fs/file.c | 26 +++++----- fs/f2fs/gc.c | 5 +- 5 files changed, 68 insertions(+), 95 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ba30b435a5a43..3b762611ff6d4c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -917,7 +917,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) sync_inode_page(dn); } -struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) +struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; @@ -927,84 +927,9 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, - .rw = sync ? READ_SYNC : READA, + .rw = rw, }; - /* - * If sync is false, it needs to check its block allocation. - * This is need and triggered by two flows: - * gc and truncate_partial_data_page. - */ - if (!sync) - goto search; - - page = find_get_page(mapping, index); - if (page && PageUptodate(page)) - return page; - f2fs_put_page(page, 0); -search: - if (f2fs_lookup_extent_cache(inode, index, &ei)) { - dn.data_blkaddr = ei.blk + index - ei.fofs; - goto got_it; - } - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) - return ERR_PTR(err); - f2fs_put_dnode(&dn); - - if (dn.data_blkaddr == NULL_ADDR) - return ERR_PTR(-ENOENT); - - /* By fallocate(), there is no cached page, but with NEW_ADDR */ - if (unlikely(dn.data_blkaddr == NEW_ADDR)) - return ERR_PTR(-EINVAL); - -got_it: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); - - if (PageUptodate(page)) { - unlock_page(page); - return page; - } - - fio.blk_addr = dn.data_blkaddr; - fio.page = page; - err = f2fs_submit_page_bio(&fio); - if (err) - return ERR_PTR(err); - - if (sync) { - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 0); - return ERR_PTR(-EIO); - } - } - return page; -} - -/* - * If it tries to access a hole, return an error. - * Because, the callers, functions in dir.c and GC, should be able to know - * whether this page exists or not. - */ -struct page *get_lock_data_page(struct inode *inode, pgoff_t index) -{ - struct address_space *mapping = inode->i_mapping; - struct dnode_of_data dn; - struct page *page; - struct extent_info ei; - int err; - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .rw = READ_SYNC, - }; -repeat: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -1026,10 +951,11 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); } - got_it: - if (PageUptodate(page)) + if (PageUptodate(page)) { + unlock_page(page); return page; + } /* * A new dentry page is allocated but not able to be written, since its @@ -1040,6 +966,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); + unlock_page(page); return page; } @@ -1048,7 +975,49 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); + return page; +} + +struct page *find_data_page(struct inode *inode, pgoff_t index) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) + return page; + + if (PageUptodate(page)) + return page; + + wait_on_page_locked(page); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; +repeat: + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) + return page; + /* wait for read completion */ lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3a3302ab78713a..9d558d24e1c422 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -177,7 +177,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx, true); + dentry_page = find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { room = true; continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e43ddcdba84d4..78a430028f1dcb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1675,7 +1675,8 @@ void f2fs_destroy_extent_tree(struct inode *); void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); void f2fs_update_extent_cache(struct dnode_of_data *); void f2fs_preserve_extent_tree(struct inode *); -struct page *find_data_page(struct inode *, pgoff_t, bool); +struct page *get_read_data_page(struct inode *, pgoff_t, int); +struct page *find_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0e58f021bf49d2..cb002c06763042 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -461,28 +461,32 @@ void truncate_data_blocks(struct dnode_of_data *dn) } static int truncate_partial_data_page(struct inode *inode, u64 from, - bool force) + bool cache_only) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); + pgoff_t index = from >> PAGE_CACHE_SHIFT; + struct address_space *mapping = inode->i_mapping; struct page *page; - if (!offset && !force) + if (!offset && !cache_only) return 0; - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force); - if (IS_ERR(page)) + if (cache_only) { + page = grab_cache_page(mapping, index); + if (page && PageUptodate(page)) + goto truncate_out; + f2fs_put_page(page, 1); return 0; + } - lock_page(page); - if (unlikely(!PageUptodate(page) || - page->mapping != inode->i_mapping)) - goto out; - + page = get_lock_data_page(inode, index); + if (IS_ERR(page)) + return 0; +truncate_out: f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - if (!force) + if (!cache_only) set_page_dirty(page); -out: f2fs_put_page(page, 1); return 0; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1bd11f017a23ba..2e2afebd9d0f50 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -607,9 +607,8 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, continue; start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); - - data_page = find_data_page(inode, - start_bidx + ofs_in_node, false); + data_page = get_read_data_page(inode, + start_bidx + ofs_in_node, READA); if (IS_ERR(data_page)) { iput(inode); continue; From d6c67a4fee866ad95fe1d727f80a56f956737706 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 1 May 2015 11:08:59 -0700 Subject: [PATCH 21/83] f2fs: revmove spin_lock for write_orphan_inodes This patch removes spin_lock, since this is covered by f2fs_lock_op already. And, we should avoid to use page operations inside spin_lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6dbff2b0bcd8cb..d076e7efb25dee 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -514,7 +514,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&im->ino_lock); + + /* + * we don't need to do spin_lock(&im->ino_lock) here, since all the + * orphan inode operations are covered under f2fs_lock_op(). + * And, spin_lock should be avoided due to page operations below. + */ head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ @@ -554,8 +559,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) set_page_dirty(page); f2fs_put_page(page, 1); } - - spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, From a66cdd985532e69999b79249cd3b4a6bccd0f84b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 22:37:50 -0700 Subject: [PATCH 22/83] f2fs: introduce discard_map for f2fs_trim_fs This patch adds a bitmap for discard issues from f2fs_trim_fs. There-in rule is to issue discard commands only for invalidated blocks after mount. Once mount is done, f2fs_trim_fs trims out whole invalid area. After ehn, it will not issue and discrads redundantly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 21 +++++++++++++ fs/f2fs/segment.c | 71 +++++++++++++++++++++++++++----------------- fs/f2fs/segment.h | 1 + 5 files changed, 68 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d076e7efb25dee..55b65e52aaea27 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1058,7 +1058,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC)) + (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || + (cpc->reason == CP_DISCARD && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f5388f37217e08..f50acbc5099826 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -143,7 +143,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct sit_info); si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); - si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 78a430028f1dcb..98fc719f5250e6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -117,6 +117,8 @@ enum { #define DEF_BATCHED_TRIM_SECTIONS 32 #define BATCHED_TRIM_SEGMENTS(sbi) \ (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) +#define BATCHED_TRIM_BLOCKS(sbi) \ + (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) struct cp_control { int reason; @@ -698,6 +700,7 @@ struct f2fs_sb_info { block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ u32 s_next_generation; /* for NFS support */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ @@ -1225,6 +1228,24 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } +static inline void f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr |= mask; +} + +static inline void f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr &= ~mask; +} + static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df8bce5379b38e..d31fbf64f7aabc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -468,6 +468,17 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, { sector_t start = SECTOR_FROM_BLOCK(blkstart); sector_t len = SECTOR_FROM_BLOCK(blklen); + struct seg_entry *se; + unsigned int offset; + block_t i; + + for (i = blkstart; i < blkstart + blklen; i++) { + se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); + offset = GET_BLKOFF_FROM_SEG0(sbi, i); + + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + } trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } @@ -483,7 +494,8 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } static void __add_discard_entry(struct f2fs_sb_info *sbi, - struct cp_control *cpc, unsigned int start, unsigned int end) + struct cp_control *cpc, struct seg_entry *se, + unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; struct discard_entry *new, *last; @@ -514,41 +526,24 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); int i; - if (!force && (!test_opt(sbi, DISCARD) || - SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)) + if (se->valid_blocks == max_blocks) return; - if (force && !se->valid_blocks) { - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - /* - * if this segment is registered in the prefree list, then - * we should skip adding a discard candidate, and let the - * checkpoint do that later. - */ - mutex_lock(&dirty_i->seglist_lock); - if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { - mutex_unlock(&dirty_i->seglist_lock); - cpc->trimmed += sbi->blocks_per_seg; - return; - } - mutex_unlock(&dirty_i->seglist_lock); - - __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); + if (!force) { + if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) return; } - /* zero block will be discarded through the prefree list */ - if (!se->valid_blocks || se->valid_blocks == max_blocks) - return; - /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = force ? ~ckpt_map[i] : + dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { @@ -561,7 +556,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (force && end - start < cpc->trim_minlen) continue; - __add_discard_entry(sbi, cpc, start, end); + __add_discard_entry(sbi, cpc, se, start, end); } } @@ -675,9 +670,13 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (del > 0) { if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); + if (f2fs_test_and_clear_bit(offset, se->discard_map)) + sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks += del; @@ -1080,7 +1079,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) /* do checkpoint to issue discard commands safely */ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { cpc.trim_start = start_segno; - cpc.trim_end = min_t(unsigned int, rounddown(start_segno + + + if (sbi->discard_blks == 0) + break; + else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) + cpc.trim_end = end_segno; + else + cpc.trim_end = min_t(unsigned int, + rounddown(start_segno + BATCHED_TRIM_SEGMENTS(sbi), sbi->segs_per_sec) - 1, end_segno); @@ -1859,8 +1865,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi) = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map - || !sit_i->sentries[start].ckpt_valid_map) + sit_i->sentries[start].discard_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map || + !sit_i->sentries[start].ckpt_valid_map || + !sit_i->sentries[start].discard_map) return -ENOMEM; } @@ -1998,6 +2007,11 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) got_it: check_block_count(sbi, start, &sit); seg_info_from_raw_sit(se, &sit); + + /* build discard map only one time */ + memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; + if (sbi->segs_per_sec > 1) { struct sec_entry *e = get_sec_entry(sbi, start); e->valid_blocks += se->valid_blocks; @@ -2247,6 +2261,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); kfree(sit_i->sentries[start].ckpt_valid_map); + kfree(sit_i->sentries[start].discard_map); } } kfree(sit_i->tmp_map); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 85d7fa7514b2cf..84963577811881 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -163,6 +163,7 @@ struct seg_entry { */ unsigned short ckpt_valid_blocks; unsigned char *ckpt_valid_map; + unsigned char *discard_map; unsigned char type; /* segment type like CURSEG_XXX_TYPE */ unsigned long long mtime; /* modification time of the segment */ }; From 836b5a6356ac49a4631c06bc87b0ea02f41623ca Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 22:50:06 -0700 Subject: [PATCH 23/83] f2fs: issue discard with finally produced len and minlen This patch determines to issue discard commands by comparing given minlen and the length of produced final candidates. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 14 ++++++-------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 55b65e52aaea27..a61d4b06e05a07 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1043,7 +1043,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(f2fs_cp_error(sbi))) return; - clear_prefree_segments(sbi); + clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 98fc719f5250e6..0b8c4543cac85d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1628,7 +1628,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *); void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); -void clear_prefree_segments(struct f2fs_sb_info *); +void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void release_discard_addrs(struct f2fs_sb_info *); void discard_next_dnode(struct f2fs_sb_info *, block_t); int npages_for_summary_flush(struct f2fs_sb_info *, bool); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d31fbf64f7aabc..61d06b74e82856 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -552,10 +552,6 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - - if (force && end - start < cpc->trim_minlen) - continue; - __add_discard_entry(sbi, cpc, se, start, end); } } @@ -586,7 +582,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi) +void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct list_head *head = &(SM_I(sbi)->discard_list); struct discard_entry *entry, *this; @@ -619,7 +615,10 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { + if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); +skip: list_del(&entry->list); SM_I(sbi)->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); @@ -1061,8 +1060,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) unsigned int start_segno, end_segno; struct cp_control cpc; - if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || - range->len < sbi->blocksize) + if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; cpc.trimmed = 0; @@ -1074,7 +1072,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen); + cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { From 402f721d2fa84cae38a403873fb4efdbbca06aff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Apr 2015 18:34:41 +0800 Subject: [PATCH 24/83] f2fs: remove unneeded f2fs_make_empty declaration Remove f2fs_make_empty() declaration, since the main body of this function is move into do_make_empty_dir() and the function is obsolete now. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0b8c4543cac85d..5fff184cb5f6bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1557,7 +1557,6 @@ int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); -int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) From d5b692b786e1aea68d2c6737dd0abeebc1dad619 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Apr 2015 18:35:50 +0800 Subject: [PATCH 25/83] f2fs: do not re-lookup nat cache with same nid In set_node_addr, we try to lookup cached nat entry of inode and then set flag in it. But previously in this function, we have already grabbed nat entry with current node id, if the node id is the same as the one of inode, we do not need to lookup it in cache again. So this patch adds condition judgment for reducing unneeded lookup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 62982e671d1808..771725650c0d05 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -315,7 +315,8 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, __set_nat_cache_dirty(nm_i, e); /* update fsync_mark if its inode nat entry is still alive */ - e = __lookup_nat_cache(nm_i, ni->ino); + if (ni->nid != ni->ino) + e = __lookup_nat_cache(nm_i, ni->ino); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); From 19f106bc03e62739961249a29916ee3602ac3de9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 May 2015 13:08:06 +0800 Subject: [PATCH 26/83] f2fs: introduce f2fs_replace_block() for reuse Introduce a generic function replace_block base on recover_data_page, and export it. So with it we can operate file's meta data which is in CP/SSA area when we invoke fallocate with FALLOC_FL_COLLAPSE_RANGE flag. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 31 ++++++++++++++++++++++++------- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5fff184cb5f6bc..160945f1707a83 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1638,8 +1638,8 @@ void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(unsigned int, struct f2fs_io_info *); void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); void rewrite_data_page(struct f2fs_io_info *); -void recover_data_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); +void f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, + block_t, block_t, bool); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b80d9d48f12582..f77a1beac78328 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -412,7 +412,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* write dummy data page */ - recover_data_page(sbi, NULL, &sum, src, dest); + f2fs_replace_block(sbi, &sum, src, dest, false); dn.data_blkaddr = dest; set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 61d06b74e82856..989c0bf484311f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1264,32 +1264,41 @@ void rewrite_data_page(struct f2fs_io_info *fio) f2fs_submit_page_mbio(fio); } -void recover_data_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr, + bool recover_curseg) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int segno, old_cursegno; struct seg_entry *se; int type; + unsigned short old_blkoff; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); type = se->type; - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { - if (old_blkaddr == NULL_ADDR) - type = CURSEG_COLD_DATA; - else + if (!recover_curseg) { + /* for recovery flow */ + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + } else { + if (!IS_CURSEG(sbi, segno)) type = CURSEG_WARM_DATA; } + curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); old_cursegno = curseg->segno; + old_blkoff = curseg->next_blkoff; /* change the current segment */ if (segno != curseg->segno) { @@ -1303,6 +1312,14 @@ void recover_data_page(struct f2fs_sb_info *sbi, refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); locate_dirty_segment(sbi, old_cursegno); + if (recover_curseg) { + if (old_cursegno != curseg->segno) { + curseg->next_segno = old_cursegno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = old_blkoff; + } + mutex_unlock(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); } From b4ace33703243fed56f8bfc80a001533acb9decb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 May 2015 13:09:46 +0800 Subject: [PATCH 27/83] f2fs: support FALLOC_FL_COLLAPSE_RANGE Now, FALLOC_FL_COLLAPSE_RANGE flag in ->fallocate is supported in ext4/xfs. In commit, the semantics of this flag is descripted as following:" 1) It collapses the range lying between offset and length by removing any data blocks which are present in this range and than updates all the logical offsets of extents beyond "offset + len" to nullify the hole created by removing blocks. In short, it does not leave a hole. 2) It should be used exclusively. No other fallocate flag in combination. 3) Offset and length supplied to fallocate should be fs block size aligned in case of xfs and ext4. 4) Collaspe range does not work beyond i_size." This patch implements fallocate's FALLOC_FL_COLLAPSE_RANGE for f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 132 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cb002c06763042..dbddc7935f332c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -765,6 +765,132 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; } +static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + int ret = 0; + + f2fs_lock_op(sbi); + + for (; end < nrpages; start++, end++) { + block_t new_addr, old_addr; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) { + goto out; + } else if (ret == -ENOENT) { + new_addr = NULL_ADDR; + } else { + new_addr = dn.data_blkaddr; + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + + if (new_addr == NULL_ADDR) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) + goto out; + else if (ret == -ENOENT) + continue; + + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_dnode(&dn); + continue; + } else { + truncate_data_blocks_range(&dn, 1); + } + + f2fs_put_dnode(&dn); + } else { + struct page *ipage; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, start); + if (ret) + goto out; + + old_addr = dn.data_blkaddr; + if (old_addr != NEW_ADDR && new_addr == NEW_ADDR) { + dn.data_blkaddr = NULL_ADDR; + f2fs_update_extent_cache(&dn); + invalidate_blocks(sbi, old_addr); + + dn.data_blkaddr = new_addr; + set_data_blkaddr(&dn); + } else if (new_addr != NEW_ADDR) { + struct node_info ni; + struct f2fs_summary sum; + + get_node_info(sbi, dn.nid, &ni); + set_summary(&sum, dn.nid, dn.ofs_in_node, + ni.version); + + f2fs_replace_block(sbi, &sum, old_addr, + new_addr, true); + + dn.data_blkaddr = new_addr; + set_data_blkaddr(&dn); + f2fs_update_extent_cache(&dn); + } + + f2fs_put_dnode(&dn); + } + } + ret = 0; +out: + f2fs_unlock_op(sbi); + return ret; +} + +static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) +{ + pgoff_t pg_start, pg_end; + loff_t new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (offset + len >= i_size_read(inode)) + return -EINVAL; + + /* collapse range should be aligned to block size of f2fs. */ + if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + pg_start = offset >> PAGE_CACHE_SHIFT; + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); + if (ret) + return ret; + + truncate_pagecache(inode, offset); + + ret = f2fs_do_collapse(inode, pg_start, pg_end); + if (ret) + return ret; + + new_size = i_size_read(inode) - len; + + ret = truncate_blocks(inode, new_size, true); + if (!ret) + i_size_write(inode, new_size); + + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -832,7 +958,8 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); @@ -842,9 +969,11 @@ static long f2fs_fallocate(struct file *file, int mode, goto out; ret = punch_hole(inode, offset, len); - } - else + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + ret = f2fs_collapse_range(inode, offset, len); + } else { ret = expand_inode_data(inode, offset, len, mode); + } if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; From 75cd4e098d178433436abce08146a21647bb4585 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 May 2015 13:11:13 +0800 Subject: [PATCH 28/83] f2fs: support FALLOC_FL_ZERO_RANGE Now, FALLOC_FL_ZERO_RANGE flag in ->fallocate is supported in ext4/xfs. In commit, the semantics of this flag is descripted as following:" 1) Make sure that both offset and len are block size aligned. 2) Update the i_size of inode by len bytes. 3) Compute the file's logical block number against offset. If the computed block number is not the starting block of the extent, split the extent such that the block number is the starting block of the extent. 4) Shift all the extents which are lying between [offset, last allocated extent] towards right by len bytes. This step will make a hole of len bytes at offset." This patch implements fallocate's FALLOC_FL_ZERO_RANGE for f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index dbddc7935f332c..7293746f1fe6ad 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -891,6 +891,108 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) return ret; } +static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, + int mode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + f2fs_balance_fs(sbi); + + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); + if (ret) + return ret; + + truncate_pagecache_range(inode, offset, offset + len - 1); + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, off_end - off_start); + if (offset + len > new_size) + new_size = offset + len; + new_size = max_t(loff_t, new_size, offset + len); + } else { + if (off_start) { + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + new_size = max_t(loff_t, new_size, + pg_start << PAGE_CACHE_SHIFT); + } + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + struct page *ipage; + + f2fs_lock_op(sbi); + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + f2fs_unlock_op(sbi); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, index); + if (ret) { + f2fs_unlock_op(sbi); + goto out; + } + + if (dn.data_blkaddr != NEW_ADDR) { + invalidate_blocks(sbi, dn.data_blkaddr); + + dn.data_blkaddr = NEW_ADDR; + set_data_blkaddr(&dn); + + dn.data_blkaddr = NULL_ADDR; + f2fs_update_extent_cache(&dn); + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + + new_size = max_t(loff_t, new_size, + (index + 1) << PAGE_CACHE_SHIFT); + } + + if (off_end) { + fill_zero(inode, pg_end, 0, off_end); + new_size = max_t(loff_t, new_size, offset + len); + } + } + +out: + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -959,7 +1061,7 @@ static long f2fs_fallocate(struct file *file, int mode, long ret = 0; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE)) + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); @@ -971,6 +1073,8 @@ static long f2fs_fallocate(struct file *file, int mode, ret = punch_hole(inode, offset, len); } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { ret = f2fs_collapse_range(inode, offset, len); + } else if (mode & FALLOC_FL_ZERO_RANGE) { + ret = f2fs_zero_range(inode, offset, len, mode); } else { ret = expand_inode_data(inode, offset, len, mode); } From 3589a9190b927f68339f6c4779196360dd453a70 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 May 2015 16:37:28 -0700 Subject: [PATCH 29/83] f2fs: avoid value overflow in showing current status This patch fixes overflow when do cat /sys/kernel/debug/f2fs/status. If a section is relatively large, dist value can be overflowed. Reported-by: Yossi Goldfill Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f50acbc5099826..efbc83f0730509 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -94,7 +94,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) static void update_sit_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; + unsigned long long bimodal, dist; unsigned int segno, vblocks; int ndirty = 0; From 7f63eb77af7bd7580b27b0c3f249f7efaa5c63ae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 May 2015 19:30:32 -0700 Subject: [PATCH 30/83] f2fs: report unwritten area in f2fs_fiemap This patch slightly changes f2fs_fiemap function to report unwritten area. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 4 +- 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3b762611ff6d4c..842fcdd9d22672 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1241,6 +1241,8 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (dn.data_blkaddr != NULL_ADDR) { map->m_flags = F2FS_MAP_MAPPED; map->m_pblk = dn.data_blkaddr; + if (dn.data_blkaddr == NEW_ADDR) + map->m_flags |= F2FS_MAP_UNWRITTEN; } else if (create) { err = __allocate_data_block(&dn); if (err) @@ -1288,7 +1290,10 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ - if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) { + if ((map->m_pblk != NEW_ADDR && + blkaddr == (map->m_pblk + ofs)) || + (map->m_pblk == NEW_ADDR && + blkaddr == NEW_ADDR)) { ofs++; dn.ofs_in_node++; pgofs++; @@ -1339,11 +1344,117 @@ static int get_data_block_fiemap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, true); } +static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) +{ + return (offset >> inode->i_blkbits); +} + +static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) +{ + return (blk << inode->i_blkbits); +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { - return generic_block_fiemap(inode, fieinfo, - start, len, get_data_block_fiemap); + struct buffer_head map_bh; + sector_t start_blk, last_blk; + loff_t isize = i_size_read(inode); + u64 logical = 0, phys = 0, size = 0; + u32 flags = 0; + bool past_eof = false, whole_file = false; + int ret = 0; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + if (len >= isize) { + whole_file = true; + len = isize; + } + + if (logical_to_blk(inode, len) == 0) + len = blk_to_logical(inode, 1); + + start_blk = logical_to_blk(inode, start); + last_blk = logical_to_blk(inode, start + len - 1); +next: + memset(&map_bh, 0, sizeof(struct buffer_head)); + map_bh.b_size = len; + + ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); + if (ret) + goto out; + + /* HOLE */ + if (!buffer_mapped(&map_bh)) { + start_blk++; + + if (!past_eof && blk_to_logical(inode, start_blk) >= isize) + past_eof = 1; + + if (past_eof && size) { + flags |= FIEMAP_EXTENT_LAST; + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + } else if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + size = 0; + } + + /* if we have holes up to/past EOF then we're done */ + if (start_blk > last_blk || past_eof || ret) + goto out; + } else { + if (start_blk > last_blk && !whole_file) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + goto out; + } + + /* + * if size != 0 then we know we already have an extent + * to add, so add it. + */ + if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + if (ret) + goto out; + } + + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; + flags = 0; + if (buffer_unwritten(&map_bh)) + flags = FIEMAP_EXTENT_UNWRITTEN; + + start_blk += logical_to_blk(inode, size); + + /* + * If we are past the EOF, then we need to make sure as + * soon as we find a hole that the last extent we found + * is marked with FIEMAP_EXTENT_LAST + */ + if (!past_eof && logical + size >= isize) + past_eof = true; + } + cond_resched(); + if (fatal_signal_pending(current)) + ret = -EINTR; + else + goto next; +out: + if (ret == 1) + ret = 0; + + mutex_unlock(&inode->i_mutex); + return ret; } /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 160945f1707a83..477e65fe7665cb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -330,7 +330,9 @@ struct extent_tree { */ #define F2FS_MAP_NEW (1 << BH_New) #define F2FS_MAP_MAPPED (1 << BH_Mapped) -#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED) +#define F2FS_MAP_UNWRITTEN (1 << BH_Unwritten) +#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\ + F2FS_MAP_UNWRITTEN) struct f2fs_map_blocks { block_t m_pblk; From cde4de1205770514005663d70a9a7d81cb555085 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 13:57:51 -0700 Subject: [PATCH 31/83] f2fs crypto: declare some definitions for f2fs encryption feature This definitions will be used by inode and superblock for encyption. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 55 +++++++++++++++ fs/f2fs/f2fs_crypto.h | 147 ++++++++++++++++++++++++++++++++++++++++ include/linux/f2fs_fs.h | 4 +- 3 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/f2fs_crypto.h diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 477e65fe7665cb..2c2948fbc35f35 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -70,6 +70,8 @@ struct f2fs_mount_info { unsigned int opt; }; +#define F2FS_FEATURE_ENCRYPT 0x0001 + #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) #define F2FS_SET_FEATURE(sb, mask) \ @@ -346,6 +348,7 @@ struct f2fs_map_blocks { */ #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 +#define FADVISE_ENCRYPT_BIT 0x04 #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -353,6 +356,16 @@ struct f2fs_map_blocks { #define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) +#define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) +#define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) +#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) + +/* Encryption algorithms */ +#define F2FS_ENCRYPTION_MODE_INVALID 0 +#define F2FS_ENCRYPTION_MODE_AES_256_XTS 1 +#define F2FS_ENCRYPTION_MODE_AES_256_GCM 2 +#define F2FS_ENCRYPTION_MODE_AES_256_CBC 3 +#define F2FS_ENCRYPTION_MODE_AES_256_CTS 4 #define DEF_DIR_LEVEL 0 @@ -380,6 +393,11 @@ struct f2fs_inode_info { struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + /* Encryption params */ + struct f2fs_crypt_info *i_crypt_info; +#endif }; static inline void get_extent_info(struct extent_info *ext, @@ -1891,4 +1909,41 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); int f2fs_read_inline_dir(struct file *, struct dir_context *); + +/* + * crypto support + */ +static inline int f2fs_encrypted_inode(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return file_is_encrypt(inode); +#else + return 0; +#endif +} + +static inline void f2fs_set_encrypted_inode(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + file_set_encrypt(inode); +#endif +} + +static inline bool f2fs_bio_encrypted(struct bio *bio) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return unlikely(bio->bi_private != NULL); +#else + return false; +#endif +} + +static inline int f2fs_sb_has_crypto(struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); +#else + return 0; +#endif +} #endif diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h new file mode 100644 index 00000000000000..3323266e7e2bd5 --- /dev/null +++ b/fs/f2fs/f2fs_crypto.h @@ -0,0 +1,147 @@ +/* + * linux/fs/f2fs/f2fs_crypto.h + * + * Copied from linux/fs/ext4/ext4_crypto.h + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption header content for f2fs + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _F2FS_CRYPTO_H +#define _F2FS_CRYPTO_H + +#include + +#define F2FS_KEY_DESCRIPTOR_SIZE 8 + +/* Policy provided via an ioctl on the topmost directory */ +struct f2fs_encryption_policy { + char version; + char contents_encryption_mode; + char filenames_encryption_mode; + char flags; + char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE]; +} __attribute__((__packed__)); + +#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +#define F2FS_KEY_DERIVATION_NONCE_SIZE 16 + +#define F2FS_POLICY_FLAGS_PAD_4 0x00 +#define F2FS_POLICY_FLAGS_PAD_8 0x01 +#define F2FS_POLICY_FLAGS_PAD_16 0x02 +#define F2FS_POLICY_FLAGS_PAD_32 0x03 +#define F2FS_POLICY_FLAGS_PAD_MASK 0x03 +#define F2FS_POLICY_FLAGS_VALID 0x03 + +/** + * Encryption context for inode + * + * Protector format: + * 1 byte: Protector format (1 = this version) + * 1 byte: File contents encryption mode + * 1 byte: File names encryption mode + * 1 byte: Flags + * 8 bytes: Master Key descriptor + * 16 bytes: Encryption Key derivation nonce + */ +struct f2fs_encryption_context { + char format; + char contents_encryption_mode; + char filenames_encryption_mode; + char flags; + char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE]; + char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE]; +} __attribute__((__packed__)); + +/* Encryption parameters */ +#define F2FS_XTS_TWEAK_SIZE 16 +#define F2FS_AES_128_ECB_KEY_SIZE 16 +#define F2FS_AES_256_GCM_KEY_SIZE 32 +#define F2FS_AES_256_CBC_KEY_SIZE 32 +#define F2FS_AES_256_CTS_KEY_SIZE 32 +#define F2FS_AES_256_XTS_KEY_SIZE 64 +#define F2FS_MAX_KEY_SIZE 64 + +struct f2fs_encryption_key { + __u32 mode; + char raw[F2FS_MAX_KEY_SIZE]; + __u32 size; +} __attribute__((__packed__)); + +struct f2fs_crypt_info { + unsigned char ci_mode; + unsigned char ci_size; + char ci_data_mode; + char ci_filename_mode; + char ci_flags; + struct crypto_ablkcipher *ci_ctfm; + struct key *ci_keyring_key; + char ci_raw[F2FS_MAX_KEY_SIZE]; + char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; +}; + +#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 +#define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 + +struct f2fs_crypto_ctx { + struct crypto_tfm *tfm; /* Crypto API context */ + struct page *bounce_page; /* Ciphertext page on write path */ + struct page *control_page; /* Original page on write path */ + struct bio *bio; /* The bio for this context */ + struct work_struct work; /* Work queue for read complete path */ + struct list_head free_list; /* Free list */ + int flags; /* Flags */ + int mode; /* Encryption mode for tfm */ +}; + +struct f2fs_completion_result { + struct completion completion; + int res; +}; + +#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \ + struct f2fs_completion_result ecr = { \ + COMPLETION_INITIALIZER((ecr).completion), 0 } + +static inline int f2fs_encryption_key_size(int mode) +{ + switch (mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + return F2FS_AES_256_XTS_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_GCM: + return F2FS_AES_256_GCM_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_CBC: + return F2FS_AES_256_CBC_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_CTS: + return F2FS_AES_256_CTS_KEY_SIZE; + default: + BUG(); + } + return 0; +} + +#define F2FS_FNAME_NUM_SCATTER_ENTRIES 4 +#define F2FS_CRYPTO_BLOCK_SIZE 16 +#define F2FS_FNAME_CRYPTO_DIGEST_SIZE 32 + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct f2fs_encrypted_symlink_data { + __le16 len; + char encrypted_path[1]; +} __attribute__((__packed__)); + +/** + * This function is used to calculate the disk space required to + * store a filename of length l in encrypted symlink format. + */ +static inline u32 encrypted_symlink_data_len(u32 l) +{ + return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1); +} +#endif /* _F2FS_CRYPTO_H */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d44e97f2b98ee8..920408a21ffdcf 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -91,7 +91,9 @@ struct f2fs_super_block { __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ __le32 feature; /* defined features */ - __u8 reserved[888]; /* valid reserved region */ + __u8 encryption_level; /* versioning level for encryption */ + __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ + __u8 reserved[871]; /* valid reserved region */ } __packed; /* From d33793fb89c2e3576d1f219e58f59365484f475e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Apr 2015 16:28:26 -0700 Subject: [PATCH 32/83] f2fs crypto: add f2fs encryption Kconfig This patch adds f2fs encryption config. This patch integrates: "ext4 crypto: require CONFIG_CRYPTO_CTR if ext4 encryption is enabled On arm64 this is apparently needed for CTS mode to function correctly. Otherwise attempts to use CTS return ENOENT." Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 05f0f663f14ca4..c629762005bc3f 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -72,6 +72,25 @@ config F2FS_CHECK_FS If you want to improve the performance, say N. +config F2FS_FS_ENCRYPTION + bool "F2FS Encryption" + depends on F2FS_FS + depends on F2FS_FS_XATTR + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_XTS + select CRYPTO_CTS + select CRYPTO_CTR + select CRYPTO_SHA256 + select KEYS + select ENCRYPTED_KEYS + help + Enable encryption of f2fs files and directories. This + feature is similar to ecryptfs, but it is more memory + efficient since it avoids caching the encrypted and + decrypted pages in the page cache. + config F2FS_IO_TRACE bool "F2FS IO tracer" depends on F2FS_FS From b93531dd7b9b39faace22abde2d5296e87b50499 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Apr 2015 16:43:31 -0700 Subject: [PATCH 33/83] f2fs crypto: add encryption xattr support This patch add some definition for enrcyption xattr. Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 969d792ca36229..71a7100d5492e8 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -35,6 +35,10 @@ #define F2FS_XATTR_INDEX_LUSTRE 5 #define F2FS_XATTR_INDEX_SECURITY 6 #define F2FS_XATTR_INDEX_ADVISE 7 +/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ +#define F2FS_XATTR_INDEX_ENCRYPTION 9 + +#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ From f424f664f0e8949361d268e2e91c7acc8cf63eb2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 15:19:06 -0700 Subject: [PATCH 34/83] f2fs crypto: add encryption policy and password salt support This patch adds encryption policy and password salt support through ioctl implementation. It adds three ioctls: F2FS_IOC_SET_ENCRYPTION_POLICY, F2FS_IOC_GET_ENCRYPTION_POLICY, F2FS_IOC_GET_ENCRYPTION_PWSALT, which use xattr operations. Note that, these definition and codes are taken from ext4 crypto support. For f2fs, xattr operations and on-disk flags for superblock and inode were changed. Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Ildar Muslukhov Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 1 + fs/f2fs/crypto_policy.c | 206 ++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 16 ++++ fs/f2fs/file.c | 93 ++++++++++++++++++ fs/f2fs/xattr.c | 3 + 5 files changed, 319 insertions(+) create mode 100644 fs/f2fs/crypto_policy.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index d92397731db846..7864f4f02ca60e 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,3 +6,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c new file mode 100644 index 00000000000000..bef254b21815a5 --- /dev/null +++ b/fs/f2fs/crypto_policy.c @@ -0,0 +1,206 @@ +/* + * copied from linux/fs/ext4/crypto_policy.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility. + * + * This contains encryption policy functions for f2fs with some modifications + * to support f2fs-specific xattr APIs. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +static int f2fs_inode_has_encryption_context(struct inode *inode) +{ + int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL); + return (res > 0); +} + +/* + * check whether the policy is consistent with the encryption context + * for the inode + */ +static int f2fs_is_encryption_context_consistent_with_policy( + struct inode *inode, const struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), NULL); + + if (res != sizeof(ctx)) + return 0; + + return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (ctx.flags == policy->flags) && + (ctx.contents_encryption_mode == + policy->contents_encryption_mode) && + (ctx.filenames_encryption_mode == + policy->filenames_encryption_mode)); +} + +static int f2fs_create_encryption_context_from_policy( + struct inode *inode, const struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + + ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1; + memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE); + + if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid contents encryption mode %d\n", __func__, + policy->contents_encryption_mode); + return -EINVAL; + } + + if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid filenames encryption mode %d\n", __func__, + policy->filenames_encryption_mode); + return -EINVAL; + } + + if (policy->flags & ~F2FS_POLICY_FLAGS_VALID) + return -EINVAL; + + ctx.contents_encryption_mode = policy->contents_encryption_mode; + ctx.filenames_encryption_mode = policy->filenames_encryption_mode; + ctx.flags = policy->flags; + BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE); + get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); + + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), NULL, 0); +} + +int f2fs_process_policy(const struct f2fs_encryption_policy *policy, + struct inode *inode) +{ + if (policy->version != 0) + return -EINVAL; + + if (!f2fs_inode_has_encryption_context(inode)) { + if (!f2fs_empty_dir(inode)) + return -ENOTEMPTY; + return f2fs_create_encryption_context_from_policy(inode, + policy); + } + + if (f2fs_is_encryption_context_consistent_with_policy(inode, policy)) + return 0; + + printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", + __func__); + return -EINVAL; +} + +int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + int res; + + if (!f2fs_encrypted_inode(inode)) + return -ENODATA; + + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx), NULL); + if (res != sizeof(ctx)) + return -ENODATA; + if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1) + return -EINVAL; + + policy->version = 0; + policy->contents_encryption_mode = ctx.contents_encryption_mode; + policy->filenames_encryption_mode = ctx.filenames_encryption_mode; + policy->flags = ctx.flags; + memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE); + return 0; +} + +int f2fs_is_child_context_consistent_with_parent(struct inode *parent, + struct inode *child) +{ + struct f2fs_crypt_info *parent_ci, *child_ci; + int res; + + if ((parent == NULL) || (child == NULL)) { + pr_err("parent %p child %p\n", parent, child); + BUG_ON(1); + } + + /* no restrictions if the parent directory is not encrypted */ + if (!f2fs_encrypted_inode(parent)) + return 1; + /* if the child directory is not encrypted, this is always a problem */ + if (!f2fs_encrypted_inode(child)) + return 0; + res = f2fs_get_encryption_info(parent); + if (res) + return 0; + res = f2fs_get_encryption_info(child); + if (res) + return 0; + parent_ci = F2FS_I(parent)->i_crypt_info; + child_ci = F2FS_I(child)->i_crypt_info; + if (!parent_ci && !child_ci) + return 1; + if (!parent_ci || !child_ci) + return 0; + + return (memcmp(parent_ci->ci_master_key, + child_ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags)); +} + +/** + * f2fs_inherit_context() - Sets a child context from its parent + * @parent: Parent inode from which the context is inherited. + * @child: Child inode that inherits the context from @parent. + * + * Return: Zero on success, non-zero otherwise + */ +int f2fs_inherit_context(struct inode *parent, struct inode *child, + struct page *ipage) +{ + struct f2fs_encryption_context ctx; + struct f2fs_crypt_info *ci; + int res; + + res = f2fs_get_encryption_info(parent); + if (res < 0) + return res; + + ci = F2FS_I(parent)->i_crypt_info; + BUG_ON(ci == NULL); + + ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1; + + ctx.contents_encryption_mode = ci->ci_data_mode; + ctx.filenames_encryption_mode = ci->ci_filename_mode; + ctx.flags = ci->ci_flags; + memcpy(ctx.master_key_descriptor, ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE); + + get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); + return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), ipage, 0); +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c2948fbc35f35..709660bd926a84 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -229,6 +229,13 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_SET_ENCRYPTION_POLICY \ + _IOR('f', 19, struct f2fs_encryption_policy) +#define F2FS_IOC_GET_ENCRYPTION_PWSALT \ + _IOW('f', 20, __u8[16]) +#define F2FS_IOC_GET_ENCRYPTION_POLICY \ + _IOW('f', 21, struct f2fs_encryption_policy) + /* * should be same as XFS_IOC_GOINGDOWN. * Flags for going down operation used by FS_IOC_GOINGDOWN @@ -367,6 +374,8 @@ struct f2fs_map_blocks { #define F2FS_ENCRYPTION_MODE_AES_256_CBC 3 #define F2FS_ENCRYPTION_MODE_AES_256_CTS 4 +#include "f2fs_crypto.h" + #define DEF_DIR_LEVEL 0 struct f2fs_inode_info { @@ -1946,4 +1955,11 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) return 0; #endif } + +/* crypto_policy.c */ +int f2fs_is_child_context_consistent_with_parent(struct inode *, + struct inode *); +int f2fs_inherit_context(struct inode *, struct inode *, struct page *); +int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); +int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7293746f1fe6ad..a66970d99cb4aa 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -1347,6 +1348,92 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) return 0; } +static bool uuid_is_nonzero(__u8 u[16]) +{ + int i; + + for (i = 0; i < 16; i++) + if (u[i]) + return true; + return false; +} + +static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_encryption_policy policy; + struct inode *inode = file_inode(filp); + + if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, + sizeof(policy))) + return -EFAULT; + + if (f2fs_has_inline_data(inode)) { + int ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + return f2fs_process_policy(&policy, inode); +#else + return -EOPNOTSUPP; +#endif +} + +static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_encryption_policy policy; + struct inode *inode = file_inode(filp); + int err; + + err = f2fs_get_policy(inode, &policy); + if (err) + return err; + + if (copy_to_user((struct f2fs_encryption_policy __user *)arg, &policy, + sizeof(policy))) + return -EFAULT; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int err; + + if (!f2fs_sb_has_crypto(inode->i_sb)) + return -EOPNOTSUPP; + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + + err = mnt_want_write_file(filp); + if (err) + return err; + + /* update superblock with uuid */ + generate_random_uuid(sbi->raw_super->encrypt_pw_salt); + + err = f2fs_commit_super(sbi); + + mnt_drop_write_file(filp); + if (err) { + /* undo new data */ + memset(sbi->raw_super->encrypt_pw_salt, 0, 16); + return err; + } +got_it: + if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, + 16)) + return -EFAULT; + return 0; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -1370,6 +1457,12 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); + case F2FS_IOC_SET_ENCRYPTION_POLICY: + return f2fs_ioc_set_encryption_policy(filp, arg); + case F2FS_IOC_GET_ENCRYPTION_POLICY: + return f2fs_ioc_get_encryption_policy(filp, arg); + case F2FS_IOC_GET_ENCRYPTION_PWSALT: + return f2fs_ioc_get_encryption_pwsalt(filp, arg); default: return -ENOTTY; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 9757f65a05bc97..07449b980acb9a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -584,6 +584,9 @@ static int __f2fs_setxattr(struct inode *inode, int index, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } + if (index == F2FS_XATTR_INDEX_ENCRYPTION && + !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) + f2fs_set_encrypted_inode(inode); if (ipage) update_inode(inode, ipage); From 57e5055b0a5e33267b8be366ee52ce5cdc239bc7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 19:52:47 -0700 Subject: [PATCH 35/83] f2fs crypto: add f2fs encryption facilities Most of parts were copied from ext4, except: - add f2fs_restore_and_release_control_page which returns control page and restore control page - remove ext4_encrypted_zeroout() - remove sbi->s_file_encryption_mode & sbi->s_dir_encryption_mode - add f2fs_end_io_crypto_work for mpage_end_io Signed-off-by: Michael Halcrow Signed-off-by: Ildar Muslukhov Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 2 +- fs/f2fs/crypto.c | 560 +++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 25 +++ fs/f2fs/super.c | 3 + 4 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/crypto.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 7864f4f02ca60e..a79907b4f9f585 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,4 +6,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o -f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c new file mode 100644 index 00000000000000..c910fa722e5c11 --- /dev/null +++ b/fs/f2fs/crypto.c @@ -0,0 +1,560 @@ +/* + * linux/fs/f2fs/crypto.c + * + * Copied from linux/fs/ext4/crypto.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * This contains encryption functions for f2fs + * + * Written by Michael Halcrow, 2014. + * + * Filename encryption additions + * Uday Savagaonkar, 2014 + * Encryption policy handling additions + * Ildar Muslukhov, 2014 + * Remove ext4_encrypted_zeroout(), + * add f2fs_restore_and_release_control_page() + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + * + * The usage of AES-XTS should conform to recommendations in NIST + * Special Publication 800-38E and IEEE P1619/D16. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +/* Encryption added and removed here! (L: */ + +static unsigned int num_prealloc_crypto_pages = 32; +static unsigned int num_prealloc_crypto_ctxs = 128; + +module_param(num_prealloc_crypto_pages, uint, 0444); +MODULE_PARM_DESC(num_prealloc_crypto_pages, + "Number of crypto pages to preallocate"); +module_param(num_prealloc_crypto_ctxs, uint, 0444); +MODULE_PARM_DESC(num_prealloc_crypto_ctxs, + "Number of crypto contexts to preallocate"); + +static mempool_t *f2fs_bounce_page_pool; + +static LIST_HEAD(f2fs_free_crypto_ctxs); +static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); + +struct workqueue_struct *f2fs_read_workqueue; +static DEFINE_MUTEX(crypto_init); + +/** + * f2fs_release_crypto_ctx() - Releases an encryption context + * @ctx: The encryption context to release. + * + * If the encryption context was allocated from the pre-allocated pool, returns + * it to that pool. Else, frees it. + * + * If there's a bounce page in the context, this frees that. + */ +void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) +{ + unsigned long flags; + + if (ctx->bounce_page) { + if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) + __free_page(ctx->bounce_page); + else + mempool_free(ctx->bounce_page, f2fs_bounce_page_pool); + ctx->bounce_page = NULL; + } + ctx->control_page = NULL; + if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { + if (ctx->tfm) + crypto_free_tfm(ctx->tfm); + kfree(ctx); + } else { + spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); + list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); + spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); + } +} + +/** + * f2fs_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context + * @mask: The allocation mask. + * + * Return: An allocated and initialized encryption context on success. An error + * value or NULL otherwise. + */ +static struct f2fs_crypto_ctx *f2fs_alloc_and_init_crypto_ctx(gfp_t mask) +{ + struct f2fs_crypto_ctx *ctx = kzalloc(sizeof(struct f2fs_crypto_ctx), + mask); + + if (!ctx) + return ERR_PTR(-ENOMEM); + return ctx; +} + +/** + * f2fs_get_crypto_ctx() - Gets an encryption context + * @inode: The inode for which we are doing the crypto + * + * Allocates and initializes an encryption context. + * + * Return: An allocated and initialized encryption context on success; error + * value or NULL otherwise. + */ +struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) +{ + struct f2fs_crypto_ctx *ctx = NULL; + int res = 0; + unsigned long flags; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + BUG_ON(ci == NULL); + /* + * We first try getting the ctx from a free list because in + * the common case the ctx will have an allocated and + * initialized crypto tfm, so it's probably a worthwhile + * optimization. For the bounce page, we first try getting it + * from the kernel allocator because that's just about as fast + * as getting it from a list and because a cache of free pages + * should generally be a "last resort" option for a filesystem + * to be able to do its job. + */ + spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); + ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs, + struct f2fs_crypto_ctx, free_list); + if (ctx) + list_del(&ctx->free_list); + spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); + if (!ctx) { + ctx = f2fs_alloc_and_init_crypto_ctx(GFP_NOFS); + if (IS_ERR(ctx)) { + res = PTR_ERR(ctx); + goto out; + } + ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; + } else { + ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; + } + + /* + * Allocate a new Crypto API context if we don't already have + * one or if it isn't the right mode. + */ + BUG_ON(ci->ci_mode == F2FS_ENCRYPTION_MODE_INVALID); + if (ctx->tfm && (ctx->mode != ci->ci_mode)) { + crypto_free_tfm(ctx->tfm); + ctx->tfm = NULL; + ctx->mode = F2FS_ENCRYPTION_MODE_INVALID; + } + if (!ctx->tfm) { + switch (ci->ci_mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + ctx->tfm = crypto_ablkcipher_tfm( + crypto_alloc_ablkcipher("xts(aes)", 0, 0)); + break; + case F2FS_ENCRYPTION_MODE_AES_256_GCM: + /* + * TODO(mhalcrow): AEAD w/ gcm(aes); + * crypto_aead_setauthsize() + */ + ctx->tfm = ERR_PTR(-ENOTSUPP); + break; + default: + BUG(); + } + if (IS_ERR_OR_NULL(ctx->tfm)) { + res = PTR_ERR(ctx->tfm); + ctx->tfm = NULL; + goto out; + } + ctx->mode = ci->ci_mode; + } + BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_mode)); + + /* + * There shouldn't be a bounce page attached to the crypto + * context at this point. + */ + BUG_ON(ctx->bounce_page); + +out: + if (res) { + if (!IS_ERR_OR_NULL(ctx)) + f2fs_release_crypto_ctx(ctx); + ctx = ERR_PTR(res); + } + return ctx; +} + +/* + * Call f2fs_decrypt on every single page, reusing the encryption + * context. + */ +static void completion_pages(struct work_struct *work) +{ + struct f2fs_crypto_ctx *ctx = + container_of(work, struct f2fs_crypto_ctx, work); + struct bio *bio = ctx->bio; + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + int ret = f2fs_decrypt(ctx, page); + + if (ret) { + WARN_ON_ONCE(1); + SetPageError(page); + } else + SetPageUptodate(page); + unlock_page(page); + } + f2fs_release_crypto_ctx(ctx); + bio_put(bio); +} + +void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) +{ + INIT_WORK(&ctx->work, completion_pages); + ctx->bio = bio; + queue_work(f2fs_read_workqueue, &ctx->work); +} + +/** + * f2fs_exit_crypto() - Shutdown the f2fs encryption system + */ +void f2fs_exit_crypto(void) +{ + struct f2fs_crypto_ctx *pos, *n; + + list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { + if (pos->bounce_page) { + if (pos->flags & + F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) + __free_page(pos->bounce_page); + else + mempool_free(pos->bounce_page, + f2fs_bounce_page_pool); + } + if (pos->tfm) + crypto_free_tfm(pos->tfm); + kfree(pos); + } + INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); + if (f2fs_bounce_page_pool) + mempool_destroy(f2fs_bounce_page_pool); + f2fs_bounce_page_pool = NULL; + if (f2fs_read_workqueue) + destroy_workqueue(f2fs_read_workqueue); + f2fs_read_workqueue = NULL; +} + +/** + * f2fs_init_crypto() - Set up for f2fs encryption. + * + * We only call this when we start accessing encrypted files, since it + * results in memory getting allocated that wouldn't otherwise be used. + * + * Return: Zero on success, non-zero otherwise. + */ +int f2fs_init_crypto(void) +{ + int i, res; + + mutex_lock(&crypto_init); + if (f2fs_read_workqueue) + goto already_initialized; + + f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); + if (!f2fs_read_workqueue) { + res = -ENOMEM; + goto fail; + } + + for (i = 0; i < num_prealloc_crypto_ctxs; i++) { + struct f2fs_crypto_ctx *ctx; + + ctx = f2fs_alloc_and_init_crypto_ctx(GFP_KERNEL); + if (IS_ERR(ctx)) { + res = PTR_ERR(ctx); + goto fail; + } + list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); + } + + f2fs_bounce_page_pool = + mempool_create_page_pool(num_prealloc_crypto_pages, 0); + if (!f2fs_bounce_page_pool) { + res = -ENOMEM; + goto fail; + } +already_initialized: + mutex_unlock(&crypto_init); + return 0; +fail: + f2fs_exit_crypto(); + mutex_unlock(&crypto_init); + return res; +} + +void f2fs_restore_and_release_control_page(struct page **page) +{ + struct f2fs_crypto_ctx *ctx; + struct page *bounce_page; + + /* The bounce data pages are unmapped. */ + if ((*page)->mapping) + return; + + /* The bounce data page is unmapped. */ + bounce_page = *page; + ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page); + + /* restore control page */ + *page = ctx->control_page; + + f2fs_restore_control_page(bounce_page); +} + +void f2fs_restore_control_page(struct page *data_page) +{ + struct f2fs_crypto_ctx *ctx = + (struct f2fs_crypto_ctx *)page_private(data_page); + + set_page_private(data_page, (unsigned long)NULL); + ClearPagePrivate(data_page); + unlock_page(data_page); + f2fs_release_crypto_ctx(ctx); +} + +/** + * f2fs_crypt_complete() - The completion callback for page encryption + * @req: The asynchronous encryption request context + * @res: The result of the encryption operation + */ +static void f2fs_crypt_complete(struct crypto_async_request *req, int res) +{ + struct f2fs_completion_result *ecr = req->data; + + if (res == -EINPROGRESS) + return; + ecr->res = res; + complete(&ecr->completion); +} + +typedef enum { + F2FS_DECRYPT = 0, + F2FS_ENCRYPT, +} f2fs_direction_t; + +static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, + struct inode *inode, + f2fs_direction_t rw, + pgoff_t index, + struct page *src_page, + struct page *dest_page) +{ + u8 xts_tweak[F2FS_XTS_TWEAK_SIZE]; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist dst, src; + struct f2fs_inode_info *fi = F2FS_I(inode); + struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm); + int res = 0; + + BUG_ON(!ctx->tfm); + BUG_ON(ctx->mode != fi->i_crypt_info->ci_mode); + + if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) { + printk_ratelimited(KERN_ERR + "%s: unsupported crypto algorithm: %d\n", + __func__, ctx->mode); + return -ENOTSUPP; + } + + crypto_ablkcipher_clear_flags(atfm, ~0); + crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY); + + res = crypto_ablkcipher_setkey(atfm, fi->i_crypt_info->ci_raw, + fi->i_crypt_info->ci_size); + if (res) { + printk_ratelimited(KERN_ERR + "%s: crypto_ablkcipher_setkey() failed\n", + __func__); + return res; + } + req = ablkcipher_request_alloc(atfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", + __func__); + return -ENOMEM; + } + ablkcipher_request_set_callback( + req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_crypt_complete, &ecr); + + BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index)); + memcpy(xts_tweak, &index, sizeof(index)); + memset(&xts_tweak[sizeof(index)], 0, + F2FS_XTS_TWEAK_SIZE - sizeof(index)); + + sg_init_table(&dst, 1); + sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0); + sg_init_table(&src, 1); + sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0); + ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE, + xts_tweak); + if (rw == F2FS_DECRYPT) + res = crypto_ablkcipher_decrypt(req); + else + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + ablkcipher_request_free(req); + if (res) { + printk_ratelimited(KERN_ERR + "%s: crypto_ablkcipher_encrypt() returned %d\n", + __func__, res); + return res; + } + return 0; +} + +/** + * f2fs_encrypt() - Encrypts a page + * @inode: The inode for which the encryption should take place + * @plaintext_page: The page to encrypt. Must be locked. + * + * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx + * encryption context. + * + * Called on the page write path. The caller must call + * f2fs_restore_control_page() on the returned ciphertext page to + * release the bounce buffer and the encryption context. + * + * Return: An allocated page with the encrypted content on success. Else, an + * error value or NULL. + */ +struct page *f2fs_encrypt(struct inode *inode, + struct page *plaintext_page) +{ + struct f2fs_crypto_ctx *ctx; + struct page *ciphertext_page = NULL; + int err; + + BUG_ON(!PageLocked(plaintext_page)); + + ctx = f2fs_get_crypto_ctx(inode); + if (IS_ERR(ctx)) + return (struct page *)ctx; + + /* The encryption operation will require a bounce page. */ + ciphertext_page = alloc_page(GFP_NOFS); + if (!ciphertext_page) { + /* + * This is a potential bottleneck, but at least we'll have + * forward progress. + */ + ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, + GFP_NOFS); + if (WARN_ON_ONCE(!ciphertext_page)) + ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, + GFP_NOFS | __GFP_WAIT); + ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + } else { + ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + } + ctx->bounce_page = ciphertext_page; + ctx->control_page = plaintext_page; + err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, + plaintext_page, ciphertext_page); + if (err) { + f2fs_release_crypto_ctx(ctx); + return ERR_PTR(err); + } + SetPagePrivate(ciphertext_page); + set_page_private(ciphertext_page, (unsigned long)ctx); + lock_page(ciphertext_page); + return ciphertext_page; +} + +/** + * f2fs_decrypt() - Decrypts a page in-place + * @ctx: The encryption context. + * @page: The page to decrypt. Must be locked. + * + * Decrypts page in-place using the ctx encryption context. + * + * Called from the read completion callback. + * + * Return: Zero on success, non-zero otherwise. + */ +int f2fs_decrypt(struct f2fs_crypto_ctx *ctx, struct page *page) +{ + BUG_ON(!PageLocked(page)); + + return f2fs_page_crypto(ctx, page->mapping->host, + F2FS_DECRYPT, page->index, page, page); +} + +/* + * Convenience function which takes care of allocating and + * deallocating the encryption context + */ +int f2fs_decrypt_one(struct inode *inode, struct page *page) +{ + struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode); + int ret; + + if (!ctx) + return -ENOMEM; + ret = f2fs_decrypt(ctx, page); + f2fs_release_crypto_ctx(ctx); + return ret; +} + +bool f2fs_valid_contents_enc_mode(uint32_t mode) +{ + return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS); +} + +/** + * f2fs_validate_encryption_key_size() - Validate the encryption key size + * @mode: The key mode. + * @size: The key size to validate. + * + * Return: The validated key size for @mode. Zero if invalid. + */ +uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size) +{ + if (size == f2fs_encryption_key_size(mode)) + return size; + return 0; +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 709660bd926a84..b0490cb58b9715 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1962,4 +1962,29 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *, int f2fs_inherit_context(struct inode *, struct inode *, struct page *); int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); + +/* crypt.c */ +extern struct workqueue_struct *f2fs_read_workqueue; +bool f2fs_valid_contents_enc_mode(uint32_t); +uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); +struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *); +void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *); +struct page *f2fs_encrypt(struct inode *, struct page *); +int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *); +int f2fs_decrypt_one(struct inode *, struct page *); +void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +void f2fs_restore_and_release_control_page(struct page **); +void f2fs_restore_control_page(struct page *); + +int f2fs_init_crypto(void); +void f2fs_exit_crypto(void); +#else +static inline void f2fs_restore_and_release_control_page(struct page **p) { } +static inline void f2fs_restore_control_page(struct page *p) { } + +static inline int f2fs_init_crypto(void) { return 0; } +static inline void f2fs_exit_crypto(void) { } +#endif #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 138fa938c29124..d61f74ab4a0382 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -416,6 +416,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; +#ifdef CONFIG_F2FS_FS_ENCRYPTION + fi->i_crypt_info = NULL; +#endif return &fi->vfs_inode; } From 0adda907f23df2e19355b58a20f0a7ff76587c6a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Apr 2015 16:23:47 -0700 Subject: [PATCH 36/83] f2fs crypto: add encryption key management facilities This patch copies from encrypt_key.c in ext4, and modifies for f2fs. Use GFP_NOFS, since _f2fs_get_encryption_info is called under f2fs_lock_op. Signed-off-by: Michael Halcrow Signed-off-by: Ildar Muslukhov Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 2 +- fs/f2fs/crypto_key.c | 206 ++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 22 +++++ fs/f2fs/f2fs_crypto.h | 3 + 4 files changed, 232 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/crypto_key.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index a79907b4f9f585..b08925d34ed050 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,4 +6,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o -f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o crypto_key.o diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c new file mode 100644 index 00000000000000..c7d414dd3a4af9 --- /dev/null +++ b/fs/f2fs/crypto_key.c @@ -0,0 +1,206 @@ +/* + * linux/fs/f2fs/crypto_key.c + * + * Copied from linux/fs/f2fs/crypto_key.c + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption key functions for f2fs + * + * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +static void derive_crypt_complete(struct crypto_async_request *req, int rc) +{ + struct f2fs_completion_result *ecr = req->data; + + if (rc == -EINPROGRESS) + return; + + ecr->res = rc; + complete(&ecr->completion); +} + +/** + * f2fs_derive_key_aes() - Derive a key using AES-128-ECB + * @deriving_key: Encryption key used for derivatio. + * @source_key: Source key to which to apply derivation. + * @derived_key: Derived key. + * + * Return: Zero on success; non-zero otherwise. + */ +static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE], + char source_key[F2FS_AES_256_XTS_KEY_SIZE], + char derived_key[F2FS_AES_256_XTS_KEY_SIZE]) +{ + int res = 0; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist src_sg, dst_sg; + struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0, + 0); + + if (IS_ERR(tfm)) { + res = PTR_ERR(tfm); + tfm = NULL; + goto out; + } + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + res = -ENOMEM; + goto out; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + derive_crypt_complete, &ecr); + res = crypto_ablkcipher_setkey(tfm, deriving_key, + F2FS_AES_128_ECB_KEY_SIZE); + if (res < 0) + goto out; + + sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE); + sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, + F2FS_AES_256_XTS_KEY_SIZE, NULL); + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } +out: + if (req) + ablkcipher_request_free(req); + if (tfm) + crypto_free_ablkcipher(tfm); + return res; +} + +void f2fs_free_encryption_info(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *ci = fi->i_crypt_info; + + if (!ci) + return; + + if (ci->ci_keyring_key) + key_put(ci->ci_keyring_key); + crypto_free_ablkcipher(ci->ci_ctfm); + memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw)); + kfree(ci); + fi->i_crypt_info = NULL; +} + +int _f2fs_get_encryption_info(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *crypt_info; + char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE + + (F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1]; + struct key *keyring_key = NULL; + struct f2fs_encryption_key *master_key; + struct f2fs_encryption_context ctx; + struct user_key_payload *ukp; + int res; + + if (!f2fs_read_workqueue) { + res = f2fs_init_crypto(); + if (res) + return res; + } + + if (fi->i_crypt_info) { + if (!fi->i_crypt_info->ci_keyring_key || + key_validate(fi->i_crypt_info->ci_keyring_key) == 0) + return 0; + f2fs_free_encryption_info(inode); + } + + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx), NULL); + if (res < 0) + return res; + else if (res != sizeof(ctx)) + return -EINVAL; + res = 0; + + crypt_info = kmalloc(sizeof(struct f2fs_crypt_info), GFP_NOFS); + if (!crypt_info) + return -ENOMEM; + + crypt_info->ci_flags = ctx.flags; + crypt_info->ci_data_mode = ctx.contents_encryption_mode; + crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; + crypt_info->ci_ctfm = NULL; + memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, + sizeof(crypt_info->ci_master_key)); + if (S_ISREG(inode->i_mode)) + crypt_info->ci_mode = ctx.contents_encryption_mode; + else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + crypt_info->ci_mode = ctx.filenames_encryption_mode; + else { + printk(KERN_ERR "f2fs crypto: Unsupported inode type.\n"); + BUG(); + } + crypt_info->ci_size = f2fs_encryption_key_size(crypt_info->ci_mode); + BUG_ON(!crypt_info->ci_size); + + memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, + F2FS_KEY_DESC_PREFIX_SIZE); + sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE, + "%*phN", F2FS_KEY_DESCRIPTOR_SIZE, + ctx.master_key_descriptor); + full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE + + (2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0'; + keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); + if (IS_ERR(keyring_key)) { + res = PTR_ERR(keyring_key); + keyring_key = NULL; + goto out; + } + BUG_ON(keyring_key->type != &key_type_logon); + ukp = ((struct user_key_payload *)keyring_key->payload.data); + if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { + res = -EINVAL; + goto out; + } + master_key = (struct f2fs_encryption_key *)ukp->data; + BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE != + F2FS_KEY_DERIVATION_NONCE_SIZE); + BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE); + res = f2fs_derive_key_aes(ctx.nonce, master_key->raw, + crypt_info->ci_raw); +out: + if (res < 0) { + if (res == -ENOKEY) + res = 0; + kfree(crypt_info); + } else { + fi->i_crypt_info = crypt_info; + crypt_info->ci_keyring_key = keyring_key; + keyring_key = NULL; + } + if (keyring_key) + key_put(keyring_key); + return res; +} + +int f2fs_has_encryption_key(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + return (fi->i_crypt_info != NULL); +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b0490cb58b9715..eb6440960964d8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1974,17 +1974,39 @@ int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *); int f2fs_decrypt_one(struct inode *, struct page *); void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); +/* crypto_key.c */ +void f2fs_free_encryption_info(struct inode *); +int _f2fs_get_encryption_info(struct inode *inode); + #ifdef CONFIG_F2FS_FS_ENCRYPTION void f2fs_restore_and_release_control_page(struct page **); void f2fs_restore_control_page(struct page *); int f2fs_init_crypto(void); void f2fs_exit_crypto(void); + +int f2fs_has_encryption_key(struct inode *); + +static inline int f2fs_get_encryption_info(struct inode *inode) +{ + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (!ci || + (ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD))))) + return _f2fs_get_encryption_info(inode); + return 0; +} #else static inline void f2fs_restore_and_release_control_page(struct page **p) { } static inline void f2fs_restore_control_page(struct page *p) { } static inline int f2fs_init_crypto(void) { return 0; } static inline void f2fs_exit_crypto(void) { } + +static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } +static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } #endif #endif diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index 3323266e7e2bd5..6e4139493ba525 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -65,6 +65,9 @@ struct f2fs_encryption_context { #define F2FS_AES_256_XTS_KEY_SIZE 64 #define F2FS_MAX_KEY_SIZE 64 +#define F2FS_KEY_DESC_PREFIX "f2fs:" +#define F2FS_KEY_DESC_PREFIX_SIZE 5 + struct f2fs_encryption_key { __u32 mode; char raw[F2FS_MAX_KEY_SIZE]; From 6b3bd08f93a849edd56595391a54100d607ad7e4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 26 Apr 2015 00:12:50 -0700 Subject: [PATCH 37/83] f2fs crypto: filename encryption facilities This patch adds filename encryption infra. Most of codes are copied from ext4 part, but changed to adjust f2fs directory structure. Signed-off-by: Uday Savagaonkar Signed-off-by: Ildar Muslukhov Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 3 +- fs/f2fs/crypto_fname.c | 490 +++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 50 +++++ 3 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/crypto_fname.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index b08925d34ed050..396be1a39e554c 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,4 +6,5 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o -f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o crypto_key.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \ + crypto_key.o crypto_fname.o diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c new file mode 100644 index 00000000000000..8f3ff9b3caebff --- /dev/null +++ b/fs/f2fs/crypto_fname.c @@ -0,0 +1,490 @@ +/* + * linux/fs/f2fs/crypto_fname.c + * + * Copied from linux/fs/ext4/crypto.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * This contains functions for filename crypto management in f2fs + * + * Written by Uday Savagaonkar, 2014. + * + * Adjust f2fs dentry structure + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "f2fs_crypto.h" +#include "xattr.h" + +/** + * f2fs_dir_crypt_complete() - + */ +static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res) +{ + struct f2fs_completion_result *ecr = req->data; + + if (res == -EINPROGRESS) + return; + ecr->res = res; + complete(&ecr->completion); +} + +bool f2fs_valid_filenames_enc_mode(uint32_t mode) +{ + return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS); +} + +static unsigned max_name_len(struct inode *inode) +{ + return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : + F2FS_NAME_LEN; +} + +/** + * f2fs_fname_encrypt() - + * + * This function encrypts the input filename, and returns the length of the + * ciphertext. Errors are returned as negative numbers. We trust the caller to + * allocate sufficient memory to oname string. + */ +static int f2fs_fname_encrypt(struct inode *inode, + const struct qstr *iname, struct f2fs_str *oname) +{ + u32 ciphertext_len; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + char iv[F2FS_CRYPTO_BLOCK_SIZE]; + struct scatterlist src_sg, dst_sg; + int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK); + char *workbuf, buf[32], *alloc_buf = NULL; + unsigned lim = max_name_len(inode); + + if (iname->len <= 0 || iname->len > lim) + return -EIO; + + ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ? + F2FS_CRYPTO_BLOCK_SIZE : iname->len; + ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding); + ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len; + + if (ciphertext_len <= sizeof(buf)) { + workbuf = buf; + } else { + alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); + if (!alloc_buf) + return -ENOMEM; + workbuf = alloc_buf; + } + + /* Allocate request */ + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", __func__); + kfree(alloc_buf); + return -ENOMEM; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_dir_crypt_complete, &ecr); + + /* Copy the input */ + memcpy(workbuf, iname->name, iname->len); + if (iname->len < ciphertext_len) + memset(workbuf + iname->len, 0, ciphertext_len - iname->len); + + /* Initialize IV */ + memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE); + + /* Create encryption request */ + sg_init_one(&src_sg, workbuf, ciphertext_len); + sg_init_one(&dst_sg, oname->name, ciphertext_len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + kfree(alloc_buf); + ablkcipher_request_free(req); + if (res < 0) { + printk_ratelimited(KERN_ERR + "%s: Error (error code %d)\n", __func__, res); + } + oname->len = ciphertext_len; + return res; +} + +/* + * f2fs_fname_decrypt() + * This function decrypts the input filename, and returns + * the length of the plaintext. + * Errors are returned as negative numbers. + * We trust the caller to allocate sufficient memory to oname string. + */ +static int f2fs_fname_decrypt(struct inode *inode, + const struct f2fs_str *iname, struct f2fs_str *oname) +{ + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist src_sg, dst_sg; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + char iv[F2FS_CRYPTO_BLOCK_SIZE]; + unsigned lim = max_name_len(inode); + + if (iname->len <= 0 || iname->len > lim) + return -EIO; + + /* Allocate request */ + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", __func__); + return -ENOMEM; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_dir_crypt_complete, &ecr); + + /* Initialize IV */ + memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE); + + /* Create decryption request */ + sg_init_one(&src_sg, iname->name, iname->len); + sg_init_one(&dst_sg, oname->name, oname->len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); + res = crypto_ablkcipher_decrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + ablkcipher_request_free(req); + if (res < 0) { + printk_ratelimited(KERN_ERR + "%s: Error in f2fs_fname_decrypt (error code %d)\n", + __func__, res); + return res; + } + + oname->len = strnlen(oname->name, iname->len); + return oname->len; +} + +static const char *lookup_table = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +/** + * f2fs_fname_encode_digest() - + * + * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. + * The encoded string is roughly 4/3 times the size of the input string. + */ +static int digest_encode(const char *src, int len, char *dst) +{ + int i = 0, bits = 0, ac = 0; + char *cp = dst; + + while (i < len) { + ac += (((unsigned char) src[i]) << bits); + bits += 8; + do { + *cp++ = lookup_table[ac & 0x3f]; + ac >>= 6; + bits -= 6; + } while (bits >= 6); + i++; + } + if (bits) + *cp++ = lookup_table[ac & 0x3f]; + return cp - dst; +} + +static int digest_decode(const char *src, int len, char *dst) +{ + int i = 0, bits = 0, ac = 0; + const char *p; + char *cp = dst; + + while (i < len) { + p = strchr(lookup_table, src[i]); + if (p == NULL || src[i] == 0) + return -2; + ac += (p - lookup_table) << bits; + bits += 6; + if (bits >= 8) { + *cp++ = ac & 0xff; + ac >>= 8; + bits -= 8; + } + i++; + } + if (ac) + return -1; + return cp - dst; +} + +int f2fs_setup_fname_crypto(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *ci = fi->i_crypt_info; + struct crypto_ablkcipher *ctfm; + int res; + + /* Check if the crypto policy is set on the inode */ + res = f2fs_encrypted_inode(inode); + if (res == 0) + return 0; + + res = f2fs_get_encryption_info(inode); + if (res < 0) + return res; + ci = fi->i_crypt_info; + + if (!ci || ci->ci_ctfm) + return 0; + + if (ci->ci_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { + printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n", + ci->ci_mode); + return -ENOKEY; + } + + ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + printk(KERN_DEBUG "%s: error (%d) allocating crypto tfm\n", + __func__, res); + return res; + } + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + + res = crypto_ablkcipher_setkey(ctfm, ci->ci_raw, ci->ci_size); + if (res) { + crypto_free_ablkcipher(ctfm); + return -EIO; + } + ci->ci_ctfm = ctfm; + return 0; +} + +/** + * f2fs_fname_crypto_round_up() - + * + * Return: The next multiple of block size + */ +u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize) +{ + return ((size + blksize - 1) / blksize) * blksize; +} + +/** + * f2fs_fname_crypto_alloc_obuff() - + * + * Allocates an output buffer that is sufficient for the crypto operation + * specified by the context and the direction. + */ +int f2fs_fname_crypto_alloc_buffer(struct inode *inode, + u32 ilen, struct f2fs_str *crypto_str) +{ + unsigned int olen; + int padding = 16; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (ci) + padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK); + if (padding < F2FS_CRYPTO_BLOCK_SIZE) + padding = F2FS_CRYPTO_BLOCK_SIZE; + olen = f2fs_fname_crypto_round_up(ilen, padding); + crypto_str->len = olen; + if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2) + olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2; + /* Allocated buffer can hold one more character to null-terminate the + * string */ + crypto_str->name = kmalloc(olen + 1, GFP_NOFS); + if (!(crypto_str->name)) + return -ENOMEM; + return 0; +} + +/** + * f2fs_fname_crypto_free_buffer() - + * + * Frees the buffer allocated for crypto operation. + */ +void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str) +{ + if (!crypto_str) + return; + kfree(crypto_str->name); + crypto_str->name = NULL; +} + +/** + * f2fs_fname_disk_to_usr() - converts a filename from disk space to user space + */ +int f2fs_fname_disk_to_usr(struct inode *inode, + f2fs_hash_t *hash, + const struct f2fs_str *iname, + struct f2fs_str *oname) +{ + const struct qstr qname = FSTR_TO_QSTR(iname); + char buf[24]; + int ret; + + if (is_dot_dotdot(&qname)) { + oname->name[0] = '.'; + oname->name[iname->len - 1] = '.'; + oname->len = iname->len; + return oname->len; + } + + if (F2FS_I(inode)->i_crypt_info) + return f2fs_fname_decrypt(inode, iname, oname); + + if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) { + ret = digest_encode(iname->name, iname->len, oname->name); + oname->len = ret; + return ret; + } + if (hash) { + memcpy(buf, hash, 4); + memset(buf + 4, 0, 4); + } else + memset(buf, 0, 8); + memcpy(buf + 8, iname->name + iname->len - 16, 16); + oname->name[0] = '_'; + ret = digest_encode(buf, 24, oname->name + 1); + oname->len = ret + 1; + return ret + 1; +} + +/** + * f2fs_fname_usr_to_disk() - converts a filename from user space to disk space + */ +int f2fs_fname_usr_to_disk(struct inode *inode, + const struct qstr *iname, + struct f2fs_str *oname) +{ + int res; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (is_dot_dotdot(iname)) { + oname->name[0] = '.'; + oname->name[iname->len - 1] = '.'; + oname->len = iname->len; + return oname->len; + } + + if (ci) { + res = f2fs_fname_encrypt(inode, iname, oname); + return res; + } + /* Without a proper key, a user is not allowed to modify the filenames + * in a directory. Consequently, a user space name cannot be mapped to + * a disk-space name */ + return -EACCES; +} + +int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + struct f2fs_crypt_info *ci; + int ret = 0, bigname = 0; + + memset(fname, 0, sizeof(struct f2fs_filename)); + fname->usr_fname = iname; + + if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) { + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + goto out; + } + ret = f2fs_setup_fname_crypto(dir); + if (ret) + return ret; + ci = F2FS_I(dir)->i_crypt_info; + if (ci) { + ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len, + &fname->crypto_buf); + if (ret < 0) + goto out; + ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); + if (ret < 0) + goto out; + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; + ret = 0; + goto out; + } + if (!lookup) { + ret = -EACCES; + goto out; + } + + /* We don't have the key and we are doing a lookup; decode the + * user-supplied name + */ + if (iname->name[0] == '_') + bigname = 1; + if ((bigname && (iname->len != 33)) || + (!bigname && (iname->len > 43))) { + ret = -ENOENT; + } + fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); + if (fname->crypto_buf.name == NULL) { + ret = -ENOMEM; + goto out; + } + ret = digest_decode(iname->name + bigname, iname->len - bigname, + fname->crypto_buf.name); + if (ret < 0) { + ret = -ENOENT; + goto out; + } + fname->crypto_buf.len = ret; + if (bigname) { + memcpy(&fname->hash, fname->crypto_buf.name, 4); + } else { + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; + } + ret = 0; +out: + return ret; +} + +void f2fs_fname_free_filename(struct f2fs_filename *fname) +{ + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; + fname->usr_fname = NULL; + fname->disk_name.name = NULL; +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb6440960964d8..98ce1ddf7186f4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -257,6 +257,25 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, * For INODE and NODE manager */ /* for directory operations */ +struct f2fs_str { + unsigned char *name; + u32 len; +}; + +struct f2fs_filename { + const struct qstr *usr_fname; + struct f2fs_str disk_name; + f2fs_hash_t hash; +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_str crypto_buf; +#endif +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + struct f2fs_dentry_ptr { const void *bitmap; struct f2fs_dir_entry *dentry; @@ -1978,6 +1997,15 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); void f2fs_free_encryption_info(struct inode *); int _f2fs_get_encryption_info(struct inode *inode); +/* crypto_fname.c */ +bool f2fs_valid_filenames_enc_mode(uint32_t); +u32 f2fs_fname_crypto_round_up(u32, u32); +int f2fs_fname_crypto_alloc_buffer(struct inode *, u32, struct f2fs_str *); +int f2fs_fname_disk_to_usr(struct inode *, f2fs_hash_t *, + const struct f2fs_str *, struct f2fs_str *); +int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *, + struct f2fs_str *); + #ifdef CONFIG_F2FS_FS_ENCRYPTION void f2fs_restore_and_release_control_page(struct page **); void f2fs_restore_control_page(struct page *); @@ -1999,6 +2027,12 @@ static inline int f2fs_get_encryption_info(struct inode *inode) return _f2fs_get_encryption_info(inode); return 0; } + +int f2fs_setup_fname_crypto(struct inode *); +void f2fs_fname_crypto_free_buffer(struct f2fs_str *); +int f2fs_fname_setup_filename(struct inode *, const struct qstr *, + int lookup, struct f2fs_filename *); +void f2fs_fname_free_filename(struct f2fs_filename *); #else static inline void f2fs_restore_and_release_control_page(struct page **p) { } static inline void f2fs_restore_control_page(struct page *p) { } @@ -2008,5 +2042,21 @@ static inline void f2fs_exit_crypto(void) { } static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } + +static inline int f2fs_setup_fname_crypto(struct inode *i) { return 0; } +static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { } + +static inline int f2fs_fname_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + memset(fname, 0, sizeof(struct f2fs_filename)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void f2fs_fname_free_filename(struct f2fs_filename *fname) { } #endif #endif From fcc85a4d86b5018f08717160c89c0eb50afd1dca Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Apr 2015 20:39:58 -0700 Subject: [PATCH 38/83] f2fs crypto: activate encryption support for fs APIs This patch activates the following APIs for encryption support. The rules quoted by ext4 are: - An unencrypted directory may contain encrypted or unencrypted files or directories. - All files or directories in a directory must be protected using the same key as their containing directory. - Encrypted inode for regular file should not have inline_data. - Encrypted symlink and directory may have inline_data and inline_dentry. This patch activates the following APIs. 1. f2fs_link : validate context 2. f2fs_lookup : '' 3. f2fs_rename : '' 4. f2fs_create/f2fs_mkdir : inherit its dir's context 5. f2fs_direct_IO : do buffered io for regular files 6. f2fs_open : check encryption info 7. f2fs_file_mmap : '' 8. f2fs_setattr : '' 9. f2fs_file_write_iter : '' (Called by sys_io_submit) 10. f2fs_fallocate : do not support fcollapse 11. f2fs_evict_inode : free_encryption_info Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/dir.c | 6 ++++++ fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/file.c | 41 +++++++++++++++++++++++++++++++++++++++-- fs/f2fs/inline.c | 3 +++ fs/f2fs/inode.c | 4 ++++ fs/f2fs/namei.c | 38 ++++++++++++++++++++++++++++++++------ 7 files changed, 98 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 842fcdd9d22672..473b4d41c0c87f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1982,6 +1982,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return err; } + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return 0; + if (check_direct_IO(inode, iter, offset)) return 0; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9d558d24e1c422..f7293a2674b230 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -390,6 +390,12 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, err = f2fs_init_security(inode, dir, name, page); if (err) goto put_error; + + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + err = f2fs_inherit_context(dir, inode, page); + if (err) + goto put_error; + } } else { page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 98ce1ddf7186f4..1bf75f8c40bf4a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1975,6 +1975,17 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) #endif } +static inline bool f2fs_may_encrypt(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + mode_t mode = inode->i_mode; + + return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); +#else + return 0; +#endif +} + /* crypto_policy.c */ int f2fs_is_child_context_consistent_with_parent(struct inode *, struct inode *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a66970d99cb4aa..9eb0100c57fdcd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -408,6 +408,12 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); + if (f2fs_encrypted_inode(inode)) { + int err = f2fs_get_encryption_info(inode); + if (err) + return 0; + } + /* we don't need to use inline_data strictly */ if (f2fs_has_inline_data(inode)) { int err = f2fs_convert_inline_inode(inode); @@ -420,6 +426,18 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int f2fs_file_open(struct inode *inode, struct file *filp) +{ + int ret = generic_file_open(inode, filp); + + if (!ret && f2fs_encrypted_inode(inode)) { + ret = f2fs_get_encryption_info(inode); + if (ret) + ret = -EACCES; + } + return ret; +} + int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; @@ -627,6 +645,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { + if (f2fs_encrypted_inode(inode) && + f2fs_get_encryption_info(inode)) + return -EACCES; + if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -1061,6 +1083,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (f2fs_encrypted_inode(inode) && (mode & FALLOC_FL_COLLAPSE_RANGE)) + return -EOPNOTSUPP; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; @@ -1468,6 +1493,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } +static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + if (f2fs_encrypted_inode(inode) && + !f2fs_has_encryption_key(inode) && + f2fs_get_encryption_info(inode)) + return -EACCES; + + return generic_file_write_iter(iocb, from); +} + #ifdef CONFIG_COMPAT long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1488,8 +1525,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, - .open = generic_file_open, + .write_iter = f2fs_file_write_iter, + .open = f2fs_file_open, .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d9b3033bf6fd66..5f5b34be5cd9ea 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -27,6 +27,9 @@ bool f2fs_may_inline_data(struct inode *inode) if (i_size_read(inode) > MAX_INLINE_DATA) return false; + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return false; + return true; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e622ec95409ee5..00f0e74176cbe0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -359,6 +359,10 @@ void f2fs_evict_inode(struct inode *inode) if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); out_clear: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_I(inode)->i_crypt_info) + f2fs_free_encryption_info(inode); +#endif clear_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 90a96400fa4815..bc8992e8137e7c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -56,6 +56,10 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto out; } + /* If the directory encrypted, then we should encrypt the inode. */ + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + f2fs_set_encrypted_inode(inode); + if (f2fs_may_inline_data(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) @@ -157,6 +161,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; + if (f2fs_encrypted_inode(dir) && + !f2fs_is_child_context_consistent_with_parent(dir, inode)) + return -EPERM; + f2fs_balance_fs(sbi); inode->i_ctime = CURRENT_TIME; @@ -235,6 +243,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_dir_entry *de; struct page *page; nid_t ino; + int err = 0; if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -251,16 +260,26 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return ERR_CAST(inode); - if (f2fs_has_inline_dots(inode)) { - int err; + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode) && + !f2fs_is_child_context_consistent_with_parent(dir, inode)) { + iput(inode); + f2fs_msg(inode->i_sb, KERN_WARNING, + "Inconsistent encryption contexts: %lu/%lu\n", + (unsigned long)dir->i_ino, + (unsigned long)inode->i_ino); + return ERR_PTR(-EPERM); + } + if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); - if (err) { - iget_failed(inode); - return ERR_PTR(err); - } + if (err) + goto err_out; } return d_splice_alias(inode, dentry); + +err_out: + iget_failed(inode); + return ERR_PTR(err); } static int f2fs_unlink(struct inode *dir, struct dentry *dentry) @@ -460,6 +479,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *new_entry; int err = -ENOENT; + if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && + !f2fs_is_child_context_consistent_with_parent(new_dir, + old_inode)) { + err = -EPERM; + goto out; + } + f2fs_balance_fs(sbi); old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); From 4375a33664de17af9032b5f491a49bd256670927 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2015 12:04:33 -0700 Subject: [PATCH 39/83] f2fs crypto: add encryption support in read/write paths This patch adds encryption support in read and write paths. Note that, in f2fs, we need to consider cleaning operation. In cleaning procedure, we must avoid encrypting and decrypting written blocks. So, this patch implements move_encrypted_block(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +- fs/f2fs/data.c | 93 +++++++++++++++++++++++++++++++++----------- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 79 ++++++++++++++++++++++++++++++++++++- fs/f2fs/inline.c | 1 + fs/f2fs/node.c | 2 + fs/f2fs/segment.c | 24 ++++++++++-- 8 files changed, 177 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a61d4b06e05a07..7b7a9d8bd6cad2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -56,6 +56,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO, .blk_addr = index, + .encrypted_page = NULL, }; repeat: page = grab_cache_page(mapping, index); @@ -122,7 +123,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type struct f2fs_io_info fio = { .sbi = sbi, .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .encrypted_page = NULL, }; for (; nrpages-- > 0; blkno++) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 473b4d41c0c87f..8d04e24a889ff3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -57,6 +57,15 @@ static void mpage_end_io(struct bio *bio, int err) struct bio_vec *bv; int i; + if (f2fs_bio_encrypted(bio)) { + if (err) { + f2fs_release_crypto_ctx(bio->bi_private); + } else { + f2fs_end_io_crypto_work(bio->bi_private, bio); + return; + } + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; @@ -81,6 +90,8 @@ static void f2fs_write_end_io(struct bio *bio, int err) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; + f2fs_restore_and_release_control_page(&page); + if (unlikely(err)) { set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); @@ -161,7 +172,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct page *page = fio->page; + struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -185,6 +196,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; bool is_read = is_read_io(fio->rw); + struct page *bio_page; io = is_read ? &sbi->read_io : &sbi->write_io[btype]; @@ -206,7 +218,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) io->fio = *fio; } - if (bio_add_page(io->bio, fio->page, PAGE_CACHE_SIZE, 0) < + bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + + if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { __submit_merged_bio(io); goto alloc_new; @@ -928,8 +942,12 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) .sbi = F2FS_I_SB(inode), .type = DATA, .rw = rw, + .encrypted_page = NULL, }; + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return read_mapping_page(mapping, index, NULL); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -1066,26 +1084,14 @@ struct page *get_new_data_page(struct inode *inode, zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .rw = READ_SYNC, - .blk_addr = dn.data_blkaddr, - .page = page, - }; - err = f2fs_submit_page_bio(&fio); - if (err) - return ERR_PTR(err); + f2fs_put_page(page, 1); - lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) goto repeat; - } + + /* wait for read completion */ + lock_page(page); } got_it: if (new_i_size && @@ -1548,14 +1554,38 @@ static int f2fs_mpage_readpages(struct address_space *mapping, bio = NULL; } if (bio == NULL) { + struct f2fs_crypto_ctx *ctx = NULL; + + if (f2fs_encrypted_inode(inode) && + S_ISREG(inode->i_mode)) { + struct page *cpage; + + ctx = f2fs_get_crypto_ctx(inode); + if (IS_ERR(ctx)) + goto set_error_page; + + /* wait the page to be moved by cleaning */ + cpage = find_lock_page( + META_MAPPING(F2FS_I_SB(inode)), + block_nr); + if (cpage) { + f2fs_wait_on_page_writeback(cpage, + DATA); + f2fs_put_page(cpage, 1); + } + } + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, bio_get_nr_vecs(bdev))); - if (!bio) + if (!bio) { + if (ctx) + f2fs_release_crypto_ctx(ctx); goto set_error_page; + } bio->bi_bdev = bdev; bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); bio->bi_end_io = mpage_end_io; - bio->bi_private = NULL; + bio->bi_private = ctx; } if (bio_add_page(bio, page, blocksize, 0) < blocksize) @@ -1632,6 +1662,14 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + fio->encrypted_page = f2fs_encrypt(inode, fio->page); + if (IS_ERR(fio->encrypted_page)) { + err = PTR_ERR(fio->encrypted_page); + goto out_writepage; + } + } + set_page_writeback(page); /* @@ -1674,6 +1712,7 @@ static int f2fs_write_data_page(struct page *page, .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, .page = page, + .encrypted_page = NULL, }; trace_f2fs_writepage(page, DATA); @@ -1897,6 +1936,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, .rw = READ_SYNC, .blk_addr = dn.data_blkaddr, .page = page, + .encrypted_page = NULL, }; err = f2fs_submit_page_bio(&fio); if (err) @@ -1912,6 +1952,15 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_put_page(page, 1); goto repeat; } + + /* avoid symlink page */ + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + err = f2fs_decrypt_one(inode, page); + if (err) { + f2fs_put_page(page, 1); + goto fail; + } + } } out: SetPageUptodate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1bf75f8c40bf4a..4a7a9d3024bfff 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -656,6 +656,7 @@ struct f2fs_io_info { int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ block_t blk_addr; /* block address to be written */ struct page *page; /* page to be written */ + struct page *encrypted_page; /* encrypted page */ }; #define is_read_io(rw) (((rw) & 1) == READ) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9eb0100c57fdcd..452123ecd8fcc5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -504,7 +504,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, truncate_out: f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - if (!cache_only) + if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) set_page_dirty(page); f2fs_put_page(page, 1); return 0; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2e2afebd9d0f50..43354cb3ce9492 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -518,6 +518,72 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return 1; } +static void move_encrypted_block(struct inode *inode, block_t bidx) +{ + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .type = DATA, + .rw = READ_SYNC, + .encrypted_page = NULL, + }; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + struct page *page; + int err; + + /* do not read out */ + page = grab_cache_page(inode->i_mapping, bidx); + if (!page) + return; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); + if (err) + goto out; + + if (unlikely(dn.data_blkaddr == NULL_ADDR)) + goto put_out; + + get_node_info(fio.sbi, dn.nid, &ni); + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* read page */ + fio.page = page; + fio.blk_addr = dn.data_blkaddr; + + fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); + if (!fio.encrypted_page) + goto put_out; + + f2fs_submit_page_bio(&fio); + + /* allocate block address */ + f2fs_wait_on_page_writeback(dn.node_page, NODE); + + allocate_data_block(fio.sbi, NULL, fio.blk_addr, + &fio.blk_addr, &sum, CURSEG_COLD_DATA); + dn.data_blkaddr = fio.blk_addr; + + /* write page */ + lock_page(fio.encrypted_page); + set_page_writeback(fio.encrypted_page); + fio.rw = WRITE_SYNC; + f2fs_submit_page_mbio(&fio); + + set_data_blkaddr(&dn); + f2fs_update_extent_cache(&dn); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + + f2fs_put_page(fio.encrypted_page, 1); +put_out: + f2fs_put_dnode(&dn); +out: + f2fs_put_page(page, 1); +} + static void move_data_page(struct inode *inode, block_t bidx, int gc_type) { struct page *page; @@ -537,6 +603,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .type = DATA, .rw = WRITE_SYNC, .page = page, + .encrypted_page = NULL, }; f2fs_wait_on_page_writeback(page, DATA); @@ -606,6 +673,13 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(inode) || is_bad_inode(inode)) continue; + /* if encrypted inode, let's go phase 3 */ + if (f2fs_encrypted_inode(inode) && + S_ISREG(inode->i_mode)) { + add_gc_inode(gc_list, inode); + continue; + } + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); data_page = get_read_data_page(inode, start_bidx + ofs_in_node, READA); @@ -624,7 +698,10 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (inode) { start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)) + ofs_in_node; - move_data_page(inode, start_bidx, gc_type); + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + move_encrypted_block(inode, start_bidx); + else + move_data_page(inode, start_bidx, gc_type); stat_inc_data_blk_count(sbi, 1, gc_type); } } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5f5b34be5cd9ea..b0b78050283654 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -113,6 +113,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, .page = page, + .encrypted_page = NULL, }; int dirty, err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 771725650c0d05..d9c52424bac215 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1003,6 +1003,7 @@ static int read_node_page(struct page *page, int rw) .type = NODE, .rw = rw, .page = page, + .encrypted_page = NULL, }; get_node_info(sbi, page->index, &ni); @@ -1299,6 +1300,7 @@ static int f2fs_write_node_page(struct page *page, .type = NODE, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, .page = page, + .encrypted_page = NULL, }; trace_f2fs_writepage(page, NODE); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 989c0bf484311f..2c40ce13289563 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -219,6 +219,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) .sbi = sbi, .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, + .encrypted_page = NULL, }; /* @@ -1231,6 +1232,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .rw = WRITE_SYNC | REQ_META | REQ_PRIO, .blk_addr = page->index, .page = page, + .encrypted_page = NULL, }; set_page_writeback(page); @@ -1330,20 +1332,34 @@ static inline bool is_merged_page(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = &sbi->write_io[btype]; struct bio_vec *bvec; + struct page *target; int i; down_read(&io->io_rwsem); - if (!io->bio) - goto out; + if (!io->bio) { + up_read(&io->io_rwsem); + return false; + } bio_for_each_segment_all(bvec, io->bio, i) { - if (page == bvec->bv_page) { + + if (bvec->bv_page->mapping) { + target = bvec->bv_page; + } else { + struct f2fs_crypto_ctx *ctx; + + /* encrypted page */ + ctx = (struct f2fs_crypto_ctx *)page_private( + bvec->bv_page); + target = ctx->control_page; + } + + if (page == target) { up_read(&io->io_rwsem); return true; } } -out: up_read(&io->io_rwsem); return false; } From 9ea97163c6dad2430e06238acefe7a1d613629cf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Apr 2015 14:51:02 -0700 Subject: [PATCH 40/83] f2fs crypto: add filename encryption for f2fs_add_link This patch adds filename encryption support for f2fs_add_link. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f7293a2674b230..750a6883f04720 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -507,24 +507,33 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, unsigned long bidx, block; f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; - size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; - int slots = GET_DENTRY_SLOTS(namelen); struct page *page = NULL; - int err = 0; + struct f2fs_filename fname; + struct qstr new_name; + int slots, err; + + err = f2fs_fname_setup_filename(dir, name, 0, &fname); + if (err) + return err; + + new_name.name = fname_name(&fname); + new_name.len = fname_len(&fname); if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, name, inode, ino, mode); + err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); if (!err || err != -EAGAIN) - return err; + goto out; else err = 0; } - dentry_hash = f2fs_dentry_hash(name); level = 0; + slots = GET_DENTRY_SLOTS(new_name.len); + dentry_hash = f2fs_dentry_hash(&new_name); + current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { level = F2FS_I(dir)->clevel; @@ -532,8 +541,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } start: - if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) - return -ENOSPC; + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { + err = -ENOSPC; + goto out; + } /* Increase the depth, if required */ if (level == current_depth) @@ -547,8 +558,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + if (IS_ERR(dentry_page)) { + err = PTR_ERR(dentry_page); + goto out; + } dentry_blk = kmap(dentry_page); bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, @@ -568,7 +581,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, NULL); + page = init_inode_metadata(inode, dir, &new_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -576,7 +589,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } make_dentry_ptr(&d, (void *)dentry_blk, 1); - f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -598,6 +611,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); +out: + f2fs_fname_free_filename(&fname); return err; } From d8c6822a0556a88b13c8483599589752cf3e39f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Apr 2015 16:26:24 -0700 Subject: [PATCH 41/83] f2fs crypto: add filename encryption for f2fs_readdir This patch implements filename encryption support for f2fs_readdir. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 57 ++++++++++++++++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 12 ++++++---- fs/f2fs/inline.c | 13 ++++++----- 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 750a6883f04720..ab6455de52f354 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -98,7 +98,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); de = find_target_dentry(name, max_slots, &d); if (de) @@ -356,7 +356,7 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); @@ -588,7 +588,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } } - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -750,11 +750,12 @@ bool f2fs_empty_dir(struct inode *dir) } bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, - unsigned int start_pos) + unsigned int start_pos, struct f2fs_str *fstr) { unsigned char d_type = DT_UNKNOWN; unsigned int bit_pos; struct f2fs_dir_entry *de = NULL; + struct f2fs_str de_name = FSTR_INIT(NULL, 0); bit_pos = ((unsigned long)ctx->pos % d->max); @@ -768,8 +769,24 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, d_type = f2fs_filetype_table[de->file_type]; else d_type = DT_UNKNOWN; - if (!dir_emit(ctx, d->filename[bit_pos], - le16_to_cpu(de->name_len), + + /* encrypted case */ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + + if (f2fs_encrypted_inode(d->inode)) { + int save_len = fstr->len; + int ret; + + ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code, + &de_name, fstr); + de_name = *fstr; + fstr->len = save_len; + if (ret < 0) + return true; + } + + if (!dir_emit(ctx, de_name.name, de_name.len, le32_to_cpu(de->ino), d_type)) return true; @@ -788,9 +805,24 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); struct f2fs_dentry_ptr d; + struct f2fs_str fstr = FSTR_INIT(NULL, 0); + int err = 0; - if (f2fs_has_inline_dentry(inode)) - return f2fs_read_inline_dir(file, ctx); + err = f2fs_setup_fname_crypto(inode); + if (err) + return err; + + if (f2fs_encrypted_inode(inode)) { + err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN, + &fstr); + if (err < 0) + return err; + } + + if (f2fs_has_inline_dentry(inode)) { + err = f2fs_read_inline_dir(file, ctx, &fstr); + goto out; + } /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) @@ -804,9 +836,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) dentry_blk = kmap(dentry_page); - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); - if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK)) + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) goto stop; ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; @@ -819,8 +851,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } - - return 0; +out: + f2fs_fname_crypto_free_buffer(&fstr); + return err; } const struct file_operations f2fs_dir_operations = { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4a7a9d3024bfff..b478a6fa92a7d2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -277,15 +277,18 @@ struct f2fs_filename { #define fname_len(p) ((p)->disk_name.len) struct f2fs_dentry_ptr { + struct inode *inode; const void *bitmap; struct f2fs_dir_entry *dentry; __u8 (*filename)[F2FS_SLOT_LEN]; int max; }; -static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, - void *src, int type) +static inline void make_dentry_ptr(struct inode *inode, + struct f2fs_dentry_ptr *d, void *src, int type) { + d->inode = inode; + if (type == 1) { struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; d->max = NR_DENTRY_IN_BLOCK; @@ -1584,7 +1587,7 @@ void set_de_type(struct f2fs_dir_entry *, umode_t); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *); bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, - unsigned int); + unsigned int, struct f2fs_str *); void do_make_empty_dir(struct inode *, struct inode *, struct f2fs_dentry_ptr *); struct page *init_inode_metadata(struct inode *, struct inode *, @@ -1937,7 +1940,8 @@ int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); -int f2fs_read_inline_dir(struct file *, struct dir_context *); +int f2fs_read_inline_dir(struct file *, struct dir_context *, + struct f2fs_str *); /* * crypto support diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b0b78050283654..dd51d3f262b7a3 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -298,7 +298,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(&d, (void *)inline_dentry, 2); + make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); de = find_target_dentry(name, NULL, &d); unlock_page(ipage); @@ -342,7 +342,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, dentry_blk = inline_data_addr(ipage); - make_dentry_ptr(&d, (void *)dentry_blk, 2); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -446,7 +446,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, f2fs_wait_on_page_writeback(ipage, NODE); name_hash = f2fs_dentry_hash(name); - make_dentry_ptr(&d, (void *)dentry_blk, 2); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); set_page_dirty(ipage); @@ -523,7 +523,8 @@ bool f2fs_empty_inline_dir(struct inode *dir) return true; } -int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, + struct f2fs_str *fstr) { struct inode *inode = file_inode(file); struct f2fs_inline_dentry *inline_dentry = NULL; @@ -539,9 +540,9 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(&d, (void *)inline_dentry, 2); + make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); - if (!f2fs_fill_dentries(ctx, &d, 0)) + if (!f2fs_fill_dentries(ctx, &d, 0, fstr)) ctx->pos = NR_INLINE_DENTRY; f2fs_put_page(ipage, 1); From 6e22c691ba0a63ba89f516f0bf1828cf6dce372d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Apr 2015 17:12:39 -0700 Subject: [PATCH 42/83] f2fs crypto: add filename encryption for f2fs_lookup This patch implements filename encryption support for f2fs_lookup. Note that, f2fs_find_entry should be outside of f2fs_(un)lock_op(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 79 +++++++++++++++++++++++++++--------------------- fs/f2fs/f2fs.h | 9 +++--- fs/f2fs/inline.c | 9 ++++-- 3 files changed, 56 insertions(+), 41 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index ab6455de52f354..5e10d9d8099c65 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -76,20 +76,10 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static bool early_match_name(size_t namelen, f2fs_hash_t namehash, - struct f2fs_dir_entry *de) -{ - if (le16_to_cpu(de->name_len) != namelen) - return false; - - if (de->hash_code != namehash) - return false; - - return true; -} - static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, + struct f2fs_filename *fname, + f2fs_hash_t namehash, + int *max_slots, struct page **res_page) { struct f2fs_dentry_block *dentry_blk; @@ -99,8 +89,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); - de = find_target_dentry(name, max_slots, &d); - + de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; else @@ -114,13 +103,15 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, return de; } -struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, - struct f2fs_dentry_ptr *d) +struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname, + f2fs_hash_t namehash, int *max_slots, + struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - f2fs_hash_t namehash = f2fs_dentry_hash(name); int max_len = 0; + struct f2fs_str de_name = FSTR_INIT(NULL, 0); + struct f2fs_str *name = &fname->disk_name; if (max_slots) *max_slots = 0; @@ -132,8 +123,18 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, } de = &d->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de) && - !memcmp(d->filename[bit_pos], name->name, name->len)) + + /* encrypted case */ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + + /* show encrypted name */ + if (fname->hash) { + if (de->hash_code == fname->hash) + goto found; + } else if (de_name.len == name->len && + de->hash_code == namehash && + !memcmp(de_name.name, name->name, name->len)) goto found; if (max_slots && max_len > *max_slots) @@ -155,16 +156,21 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, } static struct f2fs_dir_entry *find_in_level(struct inode *dir, - unsigned int level, struct qstr *name, - f2fs_hash_t namehash, struct page **res_page) + unsigned int level, + struct f2fs_filename *fname, + struct page **res_page) { - int s = GET_DENTRY_SLOTS(name->len); + struct qstr name = FSTR_TO_QSTR(&fname->disk_name); + int s = GET_DENTRY_SLOTS(name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; + f2fs_hash_t namehash; + + namehash = f2fs_dentry_hash(&name); f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -183,7 +189,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, &max_slots, res_page); + de = find_in_block(dentry_page, fname, namehash, &max_slots, + res_page); if (de) break; @@ -211,30 +218,34 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; - f2fs_hash_t name_hash; unsigned int max_depth; unsigned int level; + struct f2fs_filename fname; + int err; *res_page = NULL; - if (f2fs_has_inline_dentry(dir)) - return find_in_inline_dir(dir, child, res_page); + err = f2fs_fname_setup_filename(dir, child, 1, &fname); + if (err) + return NULL; + + if (f2fs_has_inline_dentry(dir)) { + de = find_in_inline_dir(dir, &fname, res_page); + goto out; + } if (npages == 0) - return NULL; + goto out; - name_hash = f2fs_dentry_hash(child); max_depth = F2FS_I(dir)->i_current_depth; for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, child, name_hash, res_page); + de = find_in_level(dir, level, &fname, res_page); if (de) break; } - if (!de && F2FS_I(dir)->chash != name_hash) { - F2FS_I(dir)->chash = name_hash; - F2FS_I(dir)->clevel = level - 1; - } +out: + f2fs_fname_free_filename(&fname); return de; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b478a6fa92a7d2..6f4d1dbac2f481 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1584,8 +1584,9 @@ struct dentry *f2fs_get_parent(struct dentry *child); */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, umode_t); -struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, - struct f2fs_dentry_ptr *); + +struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *, + f2fs_hash_t, int *, struct f2fs_dentry_ptr *); bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, unsigned int, struct f2fs_str *); void do_make_empty_dir(struct inode *, struct inode *, @@ -1931,8 +1932,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); int f2fs_convert_inline_inode(struct inode *); int f2fs_write_inline_data(struct inode *, struct page *); bool recover_inline_data(struct inode *, struct page *); -struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, - struct page **); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, + struct f2fs_filename *, struct page **); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index dd51d3f262b7a3..38e75fb1e48812 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -284,23 +284,26 @@ bool recover_inline_data(struct inode *inode, struct page *npage) } struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, - struct qstr *name, struct page **res_page) + struct f2fs_filename *fname, struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_inline_dentry *inline_dentry; + struct qstr name = FSTR_TO_QSTR(&fname->disk_name); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; struct page *ipage; + f2fs_hash_t namehash; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return NULL; + namehash = f2fs_dentry_hash(&name); + inline_dentry = inline_data_addr(ipage); make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); - de = find_target_dentry(name, NULL, &d); - + de = find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) *res_page = ipage; From e7d5545285ededcf73dc7cbb9b7c65d0259f2b44 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 17:02:18 -0700 Subject: [PATCH 43/83] f2fs crypto: add filename encryption for roll-forward recovery This patch adds a bit flag to indicate whether or not i_name in the inode is encrypted. If this name is encrypted, we can't do recover_dentry during roll-forward. So, f2fs_sync_file() needs to do checkpoint, if this will be needed in future. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 8 +++++++- fs/f2fs/f2fs.h | 5 ++++- fs/f2fs/file.c | 4 +++- fs/f2fs/namei.c | 20 +++++++++++++++----- fs/f2fs/recovery.c | 13 ++++++++++++- 5 files changed, 41 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 5e10d9d8099c65..12f686914d387f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -314,10 +314,14 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, const struct qstr *name) +int update_dent_inode(struct inode *inode, struct inode *to, + const struct qstr *name) { struct page *page; + if (file_enc_name(to)) + return 0; + page = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(page)) return PTR_ERR(page); @@ -597,6 +601,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, err = PTR_ERR(page); goto fail; } + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); } make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6f4d1dbac2f481..ea4f48f3073c68 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -378,6 +378,7 @@ struct f2fs_map_blocks { #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 #define FADVISE_ENCRYPT_BIT 0x04 +#define FADVISE_ENC_NAME_BIT 0x08 #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -388,6 +389,8 @@ struct f2fs_map_blocks { #define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) #define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) #define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) +#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) +#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) /* Encryption algorithms */ #define F2FS_ENCRYPTION_MODE_INVALID 0 @@ -1602,7 +1605,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -int update_dent_inode(struct inode *, const struct qstr *); +int update_dent_inode(struct inode *, struct inode *, const struct qstr *); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 452123ecd8fcc5..cb0d6b68020a16 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -106,7 +106,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - if (update_dent_inode(inode, &dentry->d_name)) { + if (update_dent_inode(inode, inode, &dentry->d_name)) { dput(dentry); return 0; } @@ -123,6 +123,8 @@ static inline bool need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; + else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino)) + need_cp = true; else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index bc8992e8137e7c..c857f8249da090 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -517,7 +517,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto put_out_dir; - if (update_dent_inode(old_inode, &new_dentry->d_name)) { + if (update_dent_inode(old_inode, new_inode, + &new_dentry->d_name)) { release_orphan_inode(sbi); goto put_out_dir; } @@ -557,6 +558,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, down_write(&F2FS_I(old_inode)->i_sem); file_lost_pino(old_inode); + if (new_inode && file_enc_name(new_inode)) + file_set_enc_name(old_inode); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; @@ -659,13 +662,17 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_lock_op(sbi); - err = update_dent_inode(old_inode, &new_dentry->d_name); + err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); if (err) goto out_unlock; + if (file_enc_name(new_inode)) + file_set_enc_name(old_inode); - err = update_dent_inode(new_inode, &old_dentry->d_name); + err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name); if (err) goto out_undo; + if (file_enc_name(old_inode)) + file_set_enc_name(new_inode); /* update ".." directory entry info of old dentry */ if (old_dir_entry) @@ -723,8 +730,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_sync_fs(sbi->sb, 1); return 0; out_undo: - /* Still we may fail to recover name info of f2fs_inode here */ - update_dent_inode(old_inode, &old_dentry->d_name); + /* + * Still we may fail to recover name info of f2fs_inode here + * Drop it, once its name is set as encrypted + */ + update_dent_inode(old_inode, old_inode, &old_dentry->d_name); out_unlock: f2fs_unlock_op(sbi); out_new_dir: diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f77a1beac78328..9de25878db2ba0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -83,6 +83,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; } + if (file_enc_name(inode)) { + iput(dir); + return 0; + } + name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; @@ -143,6 +148,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) static void recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); + char *name; inode->i_mode = le16_to_cpu(raw->i_mode); i_size_write(inode, le64_to_cpu(raw->i_size)); @@ -153,8 +159,13 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + if (file_enc_name(inode)) + name = ""; + else + name = F2FS_INODE(page)->i_name; + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", - ino_of_node(page), F2FS_INODE(page)->i_name); + ino_of_node(page), name); } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) From cbaf042a3cc6c37f9005fd6952cbf2013ab7cc15 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 15:10:53 -0700 Subject: [PATCH 44/83] f2fs crypto: add symlink encryption This patch implements encryption support for symlink. Signed-off-by: Uday Savagaonkar Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 5 +- fs/f2fs/namei.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 142 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ea4f48f3073c68..20e7be613d1e4c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1920,6 +1920,7 @@ extern const struct address_space_operations f2fs_node_aops; extern const struct address_space_operations f2fs_meta_aops; extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_encrypted_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; extern struct kmem_cache *inode_entry_slab; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 00f0e74176cbe0..ea6ba3bc84725e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -198,7 +198,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &f2fs_symlink_inode_operations; + if (f2fs_encrypted_inode(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; + else + inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c857f8249da090..bbd83e427834b7 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -338,16 +338,26 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; - size_t symlen = strlen(symname) + 1; + size_t len = strlen(symname); + size_t p_len; + char *p_str; + struct f2fs_str disk_link = FSTR_INIT(NULL, 0); + struct f2fs_encrypted_symlink_data *sd = NULL; int err; + if (len > dir->i_sb->s_blocksize) + return -ENAMETOOLONG; + f2fs_balance_fs(sbi); inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &f2fs_symlink_inode_operations; + if (f2fs_encrypted_inode(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; + else + inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; f2fs_lock_op(sbi); @@ -355,10 +365,50 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto out; f2fs_unlock_op(sbi); - - err = page_symlink(inode, symname, symlen); alloc_nid_done(sbi, inode->i_ino); + if (f2fs_encrypted_inode(dir)) { + struct qstr istr = QSTR_INIT(symname, len); + + err = f2fs_inherit_context(dir, inode, NULL); + if (err) + goto err_out; + + err = f2fs_setup_fname_crypto(inode); + if (err) + goto err_out; + + err = f2fs_fname_crypto_alloc_buffer(inode, len, &disk_link); + if (err) + goto err_out; + + err = f2fs_fname_usr_to_disk(inode, &istr, &disk_link); + if (err < 0) + goto err_out; + + p_len = encrypted_symlink_data_len(disk_link.len) + 1; + + if (p_len > dir->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto err_out; + } + + sd = kzalloc(p_len, GFP_NOFS); + if (!sd) { + err = -ENOMEM; + goto err_out; + } + memcpy(sd->encrypted_path, disk_link.name, disk_link.len); + sd->len = cpu_to_le16(disk_link.len); + p_str = (char *)sd; + } else { + p_len = len + 1; + p_str = (char *)symname; + } + + err = page_symlink(inode, p_str, p_len); + +err_out: d_instantiate(dentry, inode); unlock_new_inode(inode); @@ -371,10 +421,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, * If the symlink path is stored into inline_data, there is no * performance regression. */ - filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1); + if (!err) + filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + kfree(sd); + f2fs_fname_crypto_free_buffer(&disk_link); return err; out: handle_failed_inode(inode); @@ -818,6 +872,84 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return err; } +#ifdef CONFIG_F2FS_FS_ENCRYPTION +static void *f2fs_encrypted_follow_link(struct dentry *dentry, + struct nameidata *nd) +{ + struct page *cpage = NULL; + char *caddr, *paddr = NULL; + struct f2fs_str cstr; + struct f2fs_str pstr = FSTR_INIT(NULL, 0); + struct inode *inode = d_inode(dentry); + struct f2fs_encrypted_symlink_data *sd; + loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); + u32 max_size = inode->i_sb->s_blocksize; + int res; + + res = f2fs_setup_fname_crypto(inode); + if (res) + return ERR_PTR(res); + + cpage = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(cpage)) + return cpage; + caddr = kmap(cpage); + caddr[size] = 0; + + /* Symlink is encrypted */ + sd = (struct f2fs_encrypted_symlink_data *)caddr; + cstr.name = sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + /* this is broken symlink case */ + if (cstr.name[0] == 0 && cstr.len == 0) { + res = -ENOENT; + goto errout; + } + + if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) > + max_size) { + /* Symlink data on the disk is corrupted */ + res = -EIO; + goto errout; + } + res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr); + if (res) + goto errout; + + res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr); + if (res < 0) + goto errout; + + paddr = pstr.name; + + /* Null-terminate the name */ + paddr[res] = '\0'; + nd_set_link(nd, paddr); + + kunmap(cpage); + page_cache_release(cpage); + return NULL; +errout: + f2fs_fname_crypto_free_buffer(&pstr); + kunmap(cpage); + page_cache_release(cpage); + return ERR_PTR(res); +} + +const struct inode_operations f2fs_encrypted_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = f2fs_encrypted_follow_link, + .put_link = kfree_put_link, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +}; +#endif + const struct inode_operations f2fs_dir_inode_operations = { .create = f2fs_create, .lookup = f2fs_lookup, From edf3fb8e9e5771944815fadbd8ef6a7e43ef0f7c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 May 2015 20:20:29 -0700 Subject: [PATCH 45/83] f2fs crypto: fix missing key when reading a page 1. mount $mnt 2. cp data $mnt/ 3. umount $mnt 4. log out 5. log in 6. cat $mnt/data -> panic, due to no i_crypt_info. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index c910fa722e5c11..c3f02b61aa5afb 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -131,7 +131,9 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) unsigned long flags; struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; - BUG_ON(ci == NULL); + if (ci == NULL) + return ERR_PTR(-EACCES); + /* * We first try getting the ctx from a free list because in * the common case the ctx will have an allocated and From d690358b2bf43f5cb12ce07d209d09b1decb79c3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 6 May 2015 18:23:21 -0700 Subject: [PATCH 46/83] f2fs crypto: remove checking key context during lookup No matter what the key is valid or not, readdir shows the dir entries correctly. So, lookup should not failed. But, we expect further accesses should be denied from open, rename, link, and so on. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index bbd83e427834b7..4ad7242f4829ca 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -260,16 +260,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return ERR_CAST(inode); - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode) && - !f2fs_is_child_context_consistent_with_parent(dir, inode)) { - iput(inode); - f2fs_msg(inode->i_sb, KERN_WARNING, - "Inconsistent encryption contexts: %lu/%lu\n", - (unsigned long)dir->i_ino, - (unsigned long)inode->i_ino); - return ERR_PTR(-EPERM); - } - if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); if (err) From 498c5e9fcd10c89404601b222c1dbcb6e4f10a02 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 7 May 2015 18:11:37 +0800 Subject: [PATCH 47/83] f2fs: add default mount options to remount I use f2fs filesystem with /data partition on my Android phone by the default mount options. When I remount /data in order to adding discard option to run some benchmarks, I find the default options such as background_gc, user_xattr and acl turned off. So I introduce a function named default_options in super.c. It do some default setting, and both mount and remount operations will call this function to complete default setting. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d61f74ab4a0382..d5dfd143c3943d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -661,6 +661,22 @@ static const struct file_operations f2fs_seq_segment_info_fops = { .release = single_release, }; +static void default_options(struct f2fs_sb_info *sbi) +{ + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + set_opt(sbi, INLINE_DATA); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -679,7 +695,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) active_logs = sbi->active_logs; sbi->mount_opt.opt = 0; - sbi->active_logs = NR_CURSEG_TYPE; + default_options(sbi); /* parse mount options */ err = parse_options(sb, data); @@ -1021,18 +1037,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; sb->s_fs_info = sbi; - /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - - set_opt(sbi, BG_GC); - set_opt(sbi, INLINE_DATA); - -#ifdef CONFIG_F2FS_FS_XATTR - set_opt(sbi, XATTR_USER); -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - set_opt(sbi, POSIX_ACL); -#endif + default_options(sbi); /* parse mount options */ options = kstrdup((const char *)data, GFP_KERNEL); if (data && !options) { From 40a02be1781f726ee015bcf7c8961a71a2a50f05 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 May 2015 20:03:49 -0700 Subject: [PATCH 48/83] f2fs: do not issue next dnode discard redundantly We have a discard map, so that we can avoid redundant discard issues. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2c40ce13289563..342e0f761d8f71 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -486,7 +486,20 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) { - if (f2fs_issue_discard(sbi, blkaddr, 1)) { + int err = -ENOTSUPP; + + if (test_opt(sbi, DISCARD)) { + struct seg_entry *se = get_seg_entry(sbi, + GET_SEGNO(sbi, blkaddr)); + unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + if (f2fs_test_bit(offset, se->discard_map)) + return; + + err = f2fs_issue_discard(sbi, blkaddr, 1); + } + + if (err) { struct page *page = grab_meta_page(sbi, blkaddr); /* zero-filled page */ set_page_dirty(page); From 08b95126c787441e4ef572422a5237dd985147be Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 12 May 2015 16:05:57 +0800 Subject: [PATCH 49/83] f2fs: add compat_ioctl to provide backward compatability introduce compat_ioctl to regular files, but doesn't add this functionality to f2fs_dir_operations. While running a 32-bit busybox, I met an error like this: (A is a directory) chattr: reading flags on A: Inappropriate ioctl for device This patch copies compat_ioctl from f2fs_file_operations and fix this problem. Signed-off-by: hujianyang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 12f686914d387f..3e923763daca0c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -879,4 +879,7 @@ const struct file_operations f2fs_dir_operations = { .iterate = f2fs_readdir, .fsync = f2fs_sync_file, .unlocked_ioctl = f2fs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = f2fs_compat_ioctl, +#endif }; From e19ef527aa32f057710ec842fe656bffc263b0bb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 18 May 2015 11:45:15 -0700 Subject: [PATCH 50/83] f2fs: avoid buggy functions This patch avoids to use a buggy function for now. It needs to fix them later. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 342e0f761d8f71..fecf69900f94a2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -75,6 +75,14 @@ static inline unsigned long __reverse_ffs(unsigned long word) static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + while (!f2fs_test_bit(offset, (unsigned char *)addr)) + offset++; + + if (offset > size) + offset = size; + + return offset; +#if 0 const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; @@ -121,11 +129,20 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result + size; /* Nope. */ found_middle: return result + __reverse_ffs(tmp); +#endif } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + while (f2fs_test_bit(offset, (unsigned char *)addr)) + offset++; + + if (offset > size) + offset = size; + + return offset; +#if 0 const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; @@ -173,6 +190,7 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, return result + size; /* Nope. */ found_middle: return result + __reverse_ffz(tmp); +#endif } void register_inmem_page(struct inode *inode, struct page *page) From 7beb428edaacef63d7a7adefa6d0bbb2db91484c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2015 22:49:58 +0200 Subject: [PATCH 51/83] f2fs: fix building on 32-bit architectures A bug fix to the debug output extended the type of some local variables to 64-bit, which now causes the kernel to fail building because of missing 64-bit division functions: ERROR: "__aeabi_uldivmod" [fs/f2fs/f2fs.ko] undefined! In the kernel, we have to use div_u64 or do_div to do this, in order to annotate that this is an expensive operation. As the function is only called for debug out, we know this is not performance critical, so it is safe to use div_u64. Signed-off-by: Arnd Bergmann Fixes: d1f85bd38db19 ("f2fs: avoid value overflow in showing current status") Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index efbc83f0730509..75176e0dd6c8b2 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -113,10 +113,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi) ndirty++; } } - dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; - si->bimodal = bimodal / dist; + dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); + si->bimodal = div_u64(bimodal, dist); if (si->dirty_count) - si->avg_vblocks = total_vblocks / ndirty; + si->avg_vblocks = div_u64(total_vblocks, ndirty); else si->avg_vblocks = 0; } From 912a83b5096eb4a5d8d95124d70585e0e861c564 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 May 2015 11:52:28 +0300 Subject: [PATCH 52/83] f2fs: cleanup a confusing indent The return was not indented far enough so it looked like it was supposed to go with the other if statement. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fecf69900f94a2..190e0558de394c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -569,8 +569,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!force) { if (!test_opt(sbi, DISCARD) || !se->valid_blocks || - SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) - return; + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) + return; } /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ From 06e1bc05cad8d06860b52c79d4669483c7e39a4f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 May 2015 14:35:14 -0700 Subject: [PATCH 53/83] f2fs: truncate data blocks for orphan inode As Hu reported, F2FS has a space leak problem, when conducting: 1) format a 4GB f2fs partition 2) dd a 3G file, 3) unlink it. So, when doing f2fs_drop_inode(), we need to truncate data blocks before skipping it. We can also drop unused caches assigned to each inode. Reported-by: hujianyang Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d5dfd143c3943d..bfc900e87fccc1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -431,8 +431,30 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { + if (!inode->i_nlink && !is_bad_inode(inode)) { + spin_unlock(&inode->i_lock); + + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, true); + + sb_start_intwrite(inode->i_sb); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + sb_end_intwrite(inode->i_sb); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_I(inode)->i_crypt_info) + f2fs_free_encryption_info(inode); +#endif + spin_lock(&inode->i_lock); + } return 0; + } return generic_drop_inode(inode); } From 8bacf6deb02367008cc25244d9c3d23b0f797048 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 May 2015 13:26:54 -0700 Subject: [PATCH 54/83] f2fs crypto: use slab caches This patch integrates the below patch into f2fs. "ext4 crypto: use slab caches Use slab caches the ext4_crypto_ctx and ext4_crypt_info structures for slighly better memory efficiency and debuggability." Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 58 ++++++++++++++++++++++---------------------- fs/f2fs/crypto_key.c | 6 ++--- fs/f2fs/f2fs.h | 1 + 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index c3f02b61aa5afb..4057c07242c69f 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -66,6 +66,9 @@ static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); struct workqueue_struct *f2fs_read_workqueue; static DEFINE_MUTEX(crypto_init); +static struct kmem_cache *f2fs_crypto_ctx_cachep; +struct kmem_cache *f2fs_crypt_info_cachep; + /** * f2fs_release_crypto_ctx() - Releases an encryption context * @ctx: The encryption context to release. @@ -90,7 +93,7 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { if (ctx->tfm) crypto_free_tfm(ctx->tfm); - kfree(ctx); + kmem_cache_free(f2fs_crypto_ctx_cachep, ctx); } else { spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); @@ -98,23 +101,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) } } -/** - * f2fs_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context - * @mask: The allocation mask. - * - * Return: An allocated and initialized encryption context on success. An error - * value or NULL otherwise. - */ -static struct f2fs_crypto_ctx *f2fs_alloc_and_init_crypto_ctx(gfp_t mask) -{ - struct f2fs_crypto_ctx *ctx = kzalloc(sizeof(struct f2fs_crypto_ctx), - mask); - - if (!ctx) - return ERR_PTR(-ENOMEM); - return ctx; -} - /** * f2fs_get_crypto_ctx() - Gets an encryption context * @inode: The inode for which we are doing the crypto @@ -151,9 +137,9 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) list_del(&ctx->free_list); spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); if (!ctx) { - ctx = f2fs_alloc_and_init_crypto_ctx(GFP_NOFS); - if (IS_ERR(ctx)) { - res = PTR_ERR(ctx); + ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS); + if (!ctx) { + res = -ENOMEM; goto out; } ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; @@ -263,7 +249,7 @@ void f2fs_exit_crypto(void) } if (pos->tfm) crypto_free_tfm(pos->tfm); - kfree(pos); + kmem_cache_free(f2fs_crypto_ctx_cachep, pos); } INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); if (f2fs_bounce_page_pool) @@ -272,6 +258,12 @@ void f2fs_exit_crypto(void) if (f2fs_read_workqueue) destroy_workqueue(f2fs_read_workqueue); f2fs_read_workqueue = NULL; + if (f2fs_crypto_ctx_cachep) + kmem_cache_destroy(f2fs_crypto_ctx_cachep); + f2fs_crypto_ctx_cachep = NULL; + if (f2fs_crypt_info_cachep) + kmem_cache_destroy(f2fs_crypt_info_cachep); + f2fs_crypt_info_cachep = NULL; } /** @@ -284,24 +276,32 @@ void f2fs_exit_crypto(void) */ int f2fs_init_crypto(void) { - int i, res; + int i, res = -ENOMEM; mutex_lock(&crypto_init); if (f2fs_read_workqueue) goto already_initialized; f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); - if (!f2fs_read_workqueue) { - res = -ENOMEM; + if (!f2fs_read_workqueue) + goto fail; + + f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypto_ctx_cachep) + goto fail; + + f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypt_info_cachep) goto fail; - } for (i = 0; i < num_prealloc_crypto_ctxs; i++) { struct f2fs_crypto_ctx *ctx; - ctx = f2fs_alloc_and_init_crypto_ctx(GFP_KERNEL); - if (IS_ERR(ctx)) { - res = PTR_ERR(ctx); + ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL); + if (!ctx) { + res = -ENOMEM; goto fail; } list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index c7d414dd3a4af9..6b9312b281b72e 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -99,7 +99,7 @@ void f2fs_free_encryption_info(struct inode *inode) key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw)); - kfree(ci); + kmem_cache_free(f2fs_crypt_info_cachep, ci); fi->i_crypt_info = NULL; } @@ -137,7 +137,7 @@ int _f2fs_get_encryption_info(struct inode *inode) return -EINVAL; res = 0; - crypt_info = kmalloc(sizeof(struct f2fs_crypt_info), GFP_NOFS); + crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS); if (!crypt_info) return -ENOMEM; @@ -187,7 +187,7 @@ int _f2fs_get_encryption_info(struct inode *inode) if (res < 0) { if (res == -ENOKEY) res = 0; - kfree(crypt_info); + kmem_cache_free(f2fs_crypt_info_cachep, crypt_info); } else { fi->i_crypt_info = crypt_info; crypt_info->ci_keyring_key = keyring_key; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20e7be613d1e4c..59e2bc1955caa3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2004,6 +2004,7 @@ int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); /* crypt.c */ +extern struct kmem_cache *f2fs_crypt_info_cachep; extern struct workqueue_struct *f2fs_read_workqueue; bool f2fs_valid_contents_enc_mode(uint32_t); uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); From 640778fbc97b36757bd1f857ba4b599a200517c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 May 2015 13:33:00 -0700 Subject: [PATCH 55/83] f2fs crypto: get rid of ci_mode from struct f2fs_crypt_info This patch integrates the below patch into f2fs. "ext4 crypto: get rid of ci_mode from struct ext4_crypt_info The ci_mode field was superfluous, and getting rid of it gets rid of an unused hole in the structure." Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 11 +++++------ fs/f2fs/crypto_fname.c | 4 ++-- fs/f2fs/crypto_key.c | 12 ++++++------ fs/f2fs/f2fs_crypto.h | 1 - 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 4057c07242c69f..e4058b92c774e7 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -151,14 +151,13 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) * Allocate a new Crypto API context if we don't already have * one or if it isn't the right mode. */ - BUG_ON(ci->ci_mode == F2FS_ENCRYPTION_MODE_INVALID); - if (ctx->tfm && (ctx->mode != ci->ci_mode)) { + if (ctx->tfm && (ctx->mode != ci->ci_data_mode)) { crypto_free_tfm(ctx->tfm); ctx->tfm = NULL; ctx->mode = F2FS_ENCRYPTION_MODE_INVALID; } if (!ctx->tfm) { - switch (ci->ci_mode) { + switch (ci->ci_data_mode) { case F2FS_ENCRYPTION_MODE_AES_256_XTS: ctx->tfm = crypto_ablkcipher_tfm( crypto_alloc_ablkcipher("xts(aes)", 0, 0)); @@ -178,9 +177,9 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) ctx->tfm = NULL; goto out; } - ctx->mode = ci->ci_mode; + ctx->mode = ci->ci_data_mode; } - BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_mode)); + BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode)); /* * There shouldn't be a bounce page attached to the crypto @@ -388,7 +387,7 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, int res = 0; BUG_ON(!ctx->tfm); - BUG_ON(ctx->mode != fi->i_crypt_info->ci_mode); + BUG_ON(ctx->mode != fi->i_crypt_info->ci_data_mode); if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) { printk_ratelimited(KERN_ERR diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index 8f3ff9b3caebff..e3a1bdc70b03b8 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -269,9 +269,9 @@ int f2fs_setup_fname_crypto(struct inode *inode) if (!ci || ci->ci_ctfm) return 0; - if (ci->ci_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { + if (ci->ci_filename_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n", - ci->ci_mode); + ci->ci_filename_mode); return -ENOKEY; } diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 6b9312b281b72e..5ae9cc8fd52b44 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -148,14 +148,14 @@ int _f2fs_get_encryption_info(struct inode *inode) memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) - crypt_info->ci_mode = ctx.contents_encryption_mode; + crypt_info->ci_size = + f2fs_encryption_key_size(crypt_info->ci_data_mode); else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - crypt_info->ci_mode = ctx.filenames_encryption_mode; - else { - printk(KERN_ERR "f2fs crypto: Unsupported inode type.\n"); + crypt_info->ci_size = + f2fs_encryption_key_size(crypt_info->ci_filename_mode); + else BUG(); - } - crypt_info->ci_size = f2fs_encryption_key_size(crypt_info->ci_mode); + BUG_ON(!crypt_info->ci_size); memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index 6e4139493ba525..58682d2830e990 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -75,7 +75,6 @@ struct f2fs_encryption_key { } __attribute__((__packed__)); struct f2fs_crypt_info { - unsigned char ci_mode; unsigned char ci_size; char ci_data_mode; char ci_filename_mode; From ca40b0305209248e5fd05f6ba8a3874656ff0117 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 May 2015 13:40:20 -0700 Subject: [PATCH 56/83] f2fs crypto: shrink size of the f2fs_crypto_ctx structure This patch integrates the below patch into f2fs. "ext4 crypto: shrink size of the ext4_crypto_ctx structure Some fields are only used when the crypto_ctx is being used on the read path, some are only used on the write path, and some are only used when the structure is on free list. Optimize memory use by using a union." Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 40 ++++++++++++++++++---------------------- fs/f2fs/f2fs_crypto.h | 21 ++++++++++++++------- fs/f2fs/segment.c | 2 +- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index e4058b92c774e7..6d38da11918d92 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -82,14 +82,14 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) { unsigned long flags; - if (ctx->bounce_page) { + if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) { if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(ctx->bounce_page); + __free_page(ctx->w.bounce_page); else - mempool_free(ctx->bounce_page, f2fs_bounce_page_pool); - ctx->bounce_page = NULL; + mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); + ctx->w.bounce_page = NULL; } - ctx->control_page = NULL; + ctx->w.control_page = NULL; if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { if (ctx->tfm) crypto_free_tfm(ctx->tfm); @@ -146,6 +146,7 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) } else { ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } + ctx->flags &= ~F2FS_WRITE_PATH_FL; /* * Allocate a new Crypto API context if we don't already have @@ -181,12 +182,6 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) } BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode)); - /* - * There shouldn't be a bounce page attached to the crypto - * context at this point. - */ - BUG_ON(ctx->bounce_page); - out: if (res) { if (!IS_ERR_OR_NULL(ctx)) @@ -203,8 +198,8 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) static void completion_pages(struct work_struct *work) { struct f2fs_crypto_ctx *ctx = - container_of(work, struct f2fs_crypto_ctx, work); - struct bio *bio = ctx->bio; + container_of(work, struct f2fs_crypto_ctx, r.work); + struct bio *bio = ctx->r.bio; struct bio_vec *bv; int i; @@ -225,9 +220,9 @@ static void completion_pages(struct work_struct *work) void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) { - INIT_WORK(&ctx->work, completion_pages); - ctx->bio = bio; - queue_work(f2fs_read_workqueue, &ctx->work); + INIT_WORK(&ctx->r.work, completion_pages); + ctx->r.bio = bio; + queue_work(f2fs_read_workqueue, &ctx->r.work); } /** @@ -238,12 +233,12 @@ void f2fs_exit_crypto(void) struct f2fs_crypto_ctx *pos, *n; list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { - if (pos->bounce_page) { + if (pos->w.bounce_page) { if (pos->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(pos->bounce_page); + __free_page(pos->w.bounce_page); else - mempool_free(pos->bounce_page, + mempool_free(pos->w.bounce_page, f2fs_bounce_page_pool); } if (pos->tfm) @@ -335,7 +330,7 @@ void f2fs_restore_and_release_control_page(struct page **page) ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page); /* restore control page */ - *page = ctx->control_page; + *page = ctx->w.control_page; f2fs_restore_control_page(bounce_page); } @@ -492,8 +487,9 @@ struct page *f2fs_encrypt(struct inode *inode, } else { ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; } - ctx->bounce_page = ciphertext_page; - ctx->control_page = plaintext_page; + ctx->flags |= F2FS_WRITE_PATH_FL; + ctx->w.bounce_page = ciphertext_page; + ctx->w.control_page = plaintext_page; err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); if (err) { diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index 58682d2830e990..e33cec9c3f3612 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -87,16 +87,23 @@ struct f2fs_crypt_info { #define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 +#define F2FS_WRITE_PATH_FL 0x00000004 struct f2fs_crypto_ctx { struct crypto_tfm *tfm; /* Crypto API context */ - struct page *bounce_page; /* Ciphertext page on write path */ - struct page *control_page; /* Original page on write path */ - struct bio *bio; /* The bio for this context */ - struct work_struct work; /* Work queue for read complete path */ - struct list_head free_list; /* Free list */ - int flags; /* Flags */ - int mode; /* Encryption mode for tfm */ + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + char flags; /* Flags */ + char mode; /* Encryption mode for tfm */ }; struct f2fs_completion_result { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 190e0558de394c..e0821d2c39e49b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1382,7 +1382,7 @@ static inline bool is_merged_page(struct f2fs_sb_info *sbi, /* encrypted page */ ctx = (struct f2fs_crypto_ctx *)page_private( bvec->bv_page); - target = ctx->control_page; + target = ctx->w.control_page; } if (page == target) { From 7bf4b5576af816f3f40bf3b99f0257e6c14bd35c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 13 May 2015 18:20:54 +0800 Subject: [PATCH 57/83] f2fs crypto: fix to release buffer for fname crypto This patch fixes memory leak issue in error path of f2fs_fname_setup_filename(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_fname.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index e3a1bdc70b03b8..016c4b63b53d87 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -425,7 +425,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) { fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; - goto out; + return 0; } ret = f2fs_setup_fname_crypto(dir); if (ret) @@ -435,14 +435,13 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len, &fname->crypto_buf); if (ret < 0) - goto out; + return ret; ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); if (ret < 0) goto out; fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; - ret = 0; - goto out; + return 0; } if (!lookup) { ret = -EACCES; @@ -476,8 +475,9 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; } - ret = 0; + return 0; out: + f2fs_fname_crypto_free_buffer(&fname->crypto_buf); return ret; } From b9da898b05859ac4a0ea4460d798b6bb655790f3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 15 May 2015 11:14:34 +0800 Subject: [PATCH 58/83] f2fs crypto: fix incorrect release for crypto ctx When encryption feature is enable, if we rmmod f2fs module, we will encounter a stack backtrace reported in syslog: "BUG: Bad page state in process rmmod pfn:aaf8a page:f0f4f148 count:0 mapcount:129 mapping:ee2f4104 index:0x80 flags: 0xee2830a4(referenced|lru|slab|private_2|writeback|swapbacked|mlocked) page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: flags: 0x2030a0(lru|slab|private_2|writeback|mlocked) Modules linked in: f2fs(O-) fuse bnep rfcomm bluetooth dm_crypt binfmt_misc snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device joydev ppdev mac_hid lp hid_generic i2c_piix4 parport_pc psmouse snd serio_raw parport soundcore ext4 jbd2 mbcache usbhid hid e1000 [last unloaded: f2fs] CPU: 1 PID: 3049 Comm: rmmod Tainted: G B O 4.1.0-rc3+ #10 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 00000000 00000000 c0021eb4 c15b7518 f0f4f148 c0021ed8 c112e0b7 c1779174 c9b75674 000aaf8a 01b13ce1 c17791a4 f0f4f148 ee2830a4 c0021ef8 c112e3c3 00000000 f0f4f148 c0021f34 f0f4f148 ee2830a4 ef9f0000 c0021f20 c112fdf8 Call Trace: [] dump_stack+0x41/0x52 [] bad_page.part.72+0xa7/0x100 [] free_pages_prepare+0x213/0x220 [] free_hot_cold_page+0x28/0x120 [] ? try_to_wake_up+0x2b0/0x2b0 [] __free_pages+0x25/0x30 [] mempool_free_pages+0xd/0x10 [] mempool_free+0x31/0x90 [] f2fs_exit_crypto+0x6f/0xf0 [f2fs] [] exit_f2fs_fs+0x23/0x95f [f2fs] [] SyS_delete_module+0x130/0x180 [] ? vm_munmap+0x46/0x60 [] sysenter_do_call+0x12/0x12" The reason is that: since commit 0827e645fd35 ("f2fs crypto: shrink size of the f2fs_crypto_ctx structure") is merged, some fields in f2fs_crypto_ctx structure are merged into a union as they will never be used simultaneously in write path, read path or on free list. In f2fs_exit_crypto, we traverse each crypto ctx from free list, in this moment, our free_list field in union is valid, but still we will try to release memory space which is pointed by other invalid field in union structure for each ctx. Then the error occurs, let's fix it with this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 6d38da11918d92..7f1ee9e8fc70c4 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -233,14 +233,6 @@ void f2fs_exit_crypto(void) struct f2fs_crypto_ctx *pos, *n; list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { - if (pos->w.bounce_page) { - if (pos->flags & - F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(pos->w.bounce_page); - else - mempool_free(pos->w.bounce_page, - f2fs_bounce_page_pool); - } if (pos->tfm) crypto_free_tfm(pos->tfm); kmem_cache_free(f2fs_crypto_ctx_cachep, pos); From cfc4d971df411c68e27d903f749e0f6fffd9da40 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 May 2015 15:37:24 -0700 Subject: [PATCH 59/83] f2fs crypto: split f2fs_crypto_init/exit with two parts This patch splits f2fs_crypto_init/exit with two parts: base initialization and memory allocation. Firstly, f2fs module declares the base encryption memory pointers. Then, allocating internal memories is done at the first encrypted inode access. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 90 +++++++++++++++++++++++++------------------- fs/f2fs/crypto_key.c | 8 ++-- fs/f2fs/f2fs.h | 6 +-- fs/f2fs/super.c | 8 +++- 4 files changed, 65 insertions(+), 47 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 7f1ee9e8fc70c4..c6d11229aee4d2 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -63,7 +63,7 @@ static mempool_t *f2fs_bounce_page_pool; static LIST_HEAD(f2fs_free_crypto_ctxs); static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); -struct workqueue_struct *f2fs_read_workqueue; +static struct workqueue_struct *f2fs_read_workqueue; static DEFINE_MUTEX(crypto_init); static struct kmem_cache *f2fs_crypto_ctx_cachep; @@ -225,10 +225,7 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) queue_work(f2fs_read_workqueue, &ctx->r.work); } -/** - * f2fs_exit_crypto() - Shutdown the f2fs encryption system - */ -void f2fs_exit_crypto(void) +static void f2fs_crypto_destroy(void) { struct f2fs_crypto_ctx *pos, *n; @@ -241,73 +238,90 @@ void f2fs_exit_crypto(void) if (f2fs_bounce_page_pool) mempool_destroy(f2fs_bounce_page_pool); f2fs_bounce_page_pool = NULL; - if (f2fs_read_workqueue) - destroy_workqueue(f2fs_read_workqueue); - f2fs_read_workqueue = NULL; - if (f2fs_crypto_ctx_cachep) - kmem_cache_destroy(f2fs_crypto_ctx_cachep); - f2fs_crypto_ctx_cachep = NULL; - if (f2fs_crypt_info_cachep) - kmem_cache_destroy(f2fs_crypt_info_cachep); - f2fs_crypt_info_cachep = NULL; } /** - * f2fs_init_crypto() - Set up for f2fs encryption. + * f2fs_crypto_initialize() - Set up for f2fs encryption. * * We only call this when we start accessing encrypted files, since it * results in memory getting allocated that wouldn't otherwise be used. * * Return: Zero on success, non-zero otherwise. */ -int f2fs_init_crypto(void) +int f2fs_crypto_initialize(void) { int i, res = -ENOMEM; + if (f2fs_bounce_page_pool) + return 0; + mutex_lock(&crypto_init); - if (f2fs_read_workqueue) + if (f2fs_bounce_page_pool) goto already_initialized; - f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); - if (!f2fs_read_workqueue) - goto fail; - - f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, - SLAB_RECLAIM_ACCOUNT); - if (!f2fs_crypto_ctx_cachep) - goto fail; - - f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, - SLAB_RECLAIM_ACCOUNT); - if (!f2fs_crypt_info_cachep) - goto fail; - for (i = 0; i < num_prealloc_crypto_ctxs; i++) { struct f2fs_crypto_ctx *ctx; ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL); - if (!ctx) { - res = -ENOMEM; + if (!ctx) goto fail; - } list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); } + /* must be allocated at the last step to avoid race condition above */ f2fs_bounce_page_pool = mempool_create_page_pool(num_prealloc_crypto_pages, 0); - if (!f2fs_bounce_page_pool) { - res = -ENOMEM; + if (!f2fs_bounce_page_pool) goto fail; - } + already_initialized: mutex_unlock(&crypto_init); return 0; fail: - f2fs_exit_crypto(); + f2fs_crypto_destroy(); mutex_unlock(&crypto_init); return res; } +/** + * f2fs_exit_crypto() - Shutdown the f2fs encryption system + */ +void f2fs_exit_crypto(void) +{ + f2fs_crypto_destroy(); + + if (f2fs_read_workqueue) + destroy_workqueue(f2fs_read_workqueue); + if (f2fs_crypto_ctx_cachep) + kmem_cache_destroy(f2fs_crypto_ctx_cachep); + if (f2fs_crypt_info_cachep) + kmem_cache_destroy(f2fs_crypt_info_cachep); +} + +int __init f2fs_init_crypto(void) +{ + int res = -ENOMEM; + + f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); + if (!f2fs_read_workqueue) + goto fail; + + f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypto_ctx_cachep) + goto fail; + + f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypt_info_cachep) + goto fail; + + return 0; +fail: + f2fs_exit_crypto(); + return res; +} + void f2fs_restore_and_release_control_page(struct page **page) { struct f2fs_crypto_ctx *ctx; diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 5ae9cc8fd52b44..8a10569a8b7cdd 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -115,11 +115,9 @@ int _f2fs_get_encryption_info(struct inode *inode) struct user_key_payload *ukp; int res; - if (!f2fs_read_workqueue) { - res = f2fs_init_crypto(); - if (res) - return res; - } + res = f2fs_crypto_initialize(); + if (res) + return res; if (fi->i_crypt_info) { if (!fi->i_crypt_info->ci_keyring_key || diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 59e2bc1955caa3..5119167685ed30 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2005,7 +2005,6 @@ int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); /* crypt.c */ extern struct kmem_cache *f2fs_crypt_info_cachep; -extern struct workqueue_struct *f2fs_read_workqueue; bool f2fs_valid_contents_enc_mode(uint32_t); uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *); @@ -2032,7 +2031,8 @@ int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *, void f2fs_restore_and_release_control_page(struct page **); void f2fs_restore_control_page(struct page *); -int f2fs_init_crypto(void); +int __init f2fs_init_crypto(void); +int f2fs_crypto_initialize(void); void f2fs_exit_crypto(void); int f2fs_has_encryption_key(struct inode *); @@ -2059,7 +2059,7 @@ void f2fs_fname_free_filename(struct f2fs_filename *); static inline void f2fs_restore_and_release_control_page(struct page **p) { } static inline void f2fs_restore_control_page(struct page *p) { } -static inline int f2fs_init_crypto(void) { return 0; } +static inline int __init f2fs_init_crypto(void) { return 0; } static inline void f2fs_exit_crypto(void) { } static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bfc900e87fccc1..4f74fee2e2afd8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1359,13 +1359,18 @@ static int __init init_f2fs_fs(void) err = -ENOMEM; goto free_extent_cache; } - err = register_filesystem(&f2fs_fs_type); + err = f2fs_init_crypto(); if (err) goto free_kset; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_crypto; f2fs_create_root_stats(); f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; +free_crypto: + f2fs_exit_crypto(); free_kset: kset_unregister(f2fs_kset); free_extent_cache: @@ -1387,6 +1392,7 @@ static void __exit exit_f2fs_fs(void) remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); + f2fs_exit_crypto(); destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); From cb5c94cf3a825a0b64ae84516e7dd5c8a59befe6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 18 May 2015 18:00:06 +0800 Subject: [PATCH 60/83] f2fs crypto: zero next free dnode block Now page cache of meta inode is used by garbage collection for encrypted page, it may contain random data, so we should zero it before issuing discard. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e0821d2c39e49b..7254c11344bee0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -519,7 +519,7 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) if (err) { struct page *page = grab_meta_page(sbi, blkaddr); - /* zero-filled page */ + memset(page_address(page), 0, F2FS_BLKSIZE); set_page_dirty(page); f2fs_put_page(page, 1); } From 381722d2ac0314f65fd98db9a0eb8bdd1d984925 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 May 2015 17:40:04 +0800 Subject: [PATCH 61/83] f2fs: introduce update_meta_page Add a help function update_meta_page() to update meta page with specified buffer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 26 ++++++-------------------- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 25 +++++++++++++++---------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7b7a9d8bd6cad2..b70bbe1a6a8ca6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -888,10 +888,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; - struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; - void *kaddr; int i; int cp_payload_blks = __cp_payload(sbi); @@ -988,19 +986,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk = __start_cp_addr(sbi); /* write out checkpoint buffer at block 0 */ - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, F2FS_BLKSIZE); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - - for (i = 1; i < 1 + cp_payload_blks; i++) { - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - } + update_meta_page(sbi, ckpt, start_blk++); + + for (i = 1; i < 1 + cp_payload_blks; i++) + update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, + start_blk++); if (orphan_num) { write_orphan_inodes(sbi, start_blk); @@ -1015,11 +1005,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* writeout checkpoint block */ - cp_page = grab_meta_page(sbi, start_blk); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, F2FS_BLKSIZE); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); + update_meta_page(sbi, ckpt, start_blk); /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5119167685ed30..7e93fcf2df788b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1690,6 +1690,7 @@ int npages_for_summary_flush(struct f2fs_sb_info *, bool); void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +void update_meta_page(struct f2fs_sb_info *, void *, block_t); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(unsigned int, struct f2fs_io_info *); void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7254c11344bee0..59566ae3f96407 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -517,12 +517,8 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) err = f2fs_issue_discard(sbi, blkaddr, 1); } - if (err) { - struct page *page = grab_meta_page(sbi, blkaddr); - memset(page_address(page), 0, F2FS_BLKSIZE); - set_page_dirty(page); - f2fs_put_page(page, 1); - } + if (err) + update_meta_page(sbi, NULL, blkaddr); } static void __add_discard_entry(struct f2fs_sb_info *sbi, @@ -801,16 +797,25 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); } -static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) +void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *kaddr = page_address(page); - memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + void *dst = page_address(page); + + if (src) + memcpy(dst, src, PAGE_CACHE_SIZE); + else + memset(dst, 0, PAGE_CACHE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + update_meta_page(sbi, (void *)sum_blk, blk_addr); +} + static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); From 7e01e7ad746bc8198a8b46163ddc73a1c7d22339 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 May 2015 17:37:26 +0800 Subject: [PATCH 62/83] f2fs: support RENAME_WHITEOUT As the description of rename in manual, RENAME_WHITEOUT is a special operation that only makes sense for overlay/union type filesystem. When performing rename with RENAME_WHITEOUT, dst will be replace with src, and meanwhile, a 'whiteout' will be create with name of src. A "whiteout" is designed to be a char device with 0,0 device number, it has specially meaning for stackable filesystem. In these filesystems, there are multiple layers exist, and only top of these can be modified. So a whiteout in top layer is used to hide a corresponding file in lower layer, as well removal of whiteout will make the file appear. Now in overlayfs, when we rename a file which is exist in lower layer, it will be copied up to upper if it is not on upper layer yet, and then rename it on upper layer, source file will be whiteouted to hide corresponding file in lower layer at the same time. So in upper layer filesystem, implementation of RENAME_WHITEOUT provide a atomic operation for stackable filesystem to support rename operation. There are multiple ways to implement RENAME_WHITEOUT in log of this commit: 7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") which pointed out by Dave Chinner. For now, we just try to follow the way that xfs/ext4 use. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 153 +++++++++++++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4ad7242f4829ca..6edad57d7b4768 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -510,14 +510,83 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, return err; } +static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, + umode_t mode, struct inode **whiteout) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct inode *inode; + int err; + + if (!whiteout) + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (whiteout) { + init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); + inode->i_op = &f2fs_special_inode_operations; + } else { + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } + + f2fs_lock_op(sbi); + err = acquire_orphan_inode(sbi); + if (err) + goto out; + + err = f2fs_do_tmpfile(inode, dir); + if (err) + goto release_out; + + /* + * add this non-linked tmpfile to orphan list, in this way we could + * remove all unused data of tmpfile after abnormal power-off. + */ + add_orphan_inode(sbi, inode->i_ino); + f2fs_unlock_op(sbi); + + alloc_nid_done(sbi, inode->i_ino); + + if (whiteout) { + inode_dec_link_count(inode); + *whiteout = inode; + } else { + d_tmpfile(dentry, inode); + } + unlock_new_inode(inode); + return 0; + +release_out: + release_orphan_inode(sbi); +out: + handle_failed_inode(inode); + return err; +} + +static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + return __f2fs_tmpfile(dir, dentry, mode, NULL); +} + +static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) +{ + return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); +} + static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); + struct inode *whiteout = NULL; struct page *old_dir_page; - struct page *old_page, *new_page; + struct page *old_page, *new_page = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; @@ -543,17 +612,23 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_old; } + if (flags & RENAME_WHITEOUT) { + err = f2fs_create_whiteout(old_dir, &whiteout); + if (err) + goto out_dir; + } + if (new_inode) { err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) - goto out_dir; + goto out_whiteout; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_entry) - goto out_dir; + goto out_whiteout; f2fs_lock_op(sbi); @@ -591,7 +666,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = f2fs_add_link(new_dentry, old_inode); if (err) { f2fs_unlock_op(sbi); - goto out_dir; + goto out_whiteout; } if (old_dir_entry) { @@ -611,8 +686,18 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_delete_entry(old_entry, old_page, old_dir, NULL); + if (whiteout) { + whiteout->i_state |= I_LINKABLE; + set_inode_flag(F2FS_I(whiteout), FI_INC_LINK); + err = f2fs_add_link(old_dentry, whiteout); + if (err) + goto put_out_dir; + whiteout->i_state &= ~I_LINKABLE; + iput(whiteout); + } + if (old_dir_entry) { - if (old_dir != new_dir) { + if (old_dir != new_dir && !whiteout) { f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); update_inode_page(old_inode); @@ -633,8 +718,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - f2fs_dentry_kunmap(new_dir, new_page); - f2fs_put_page(new_page, 0); + if (new_page) { + f2fs_dentry_kunmap(new_dir, new_page); + f2fs_put_page(new_page, 0); + } +out_whiteout: + if (whiteout) + iput(whiteout); out_dir: if (old_dir_entry) { f2fs_dentry_kunmap(old_inode, old_dir_page); @@ -805,7 +895,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; if (flags & RENAME_EXCHANGE) { @@ -816,50 +906,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ - return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry); -} - -static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct inode *inode; - int err; - - inode = f2fs_new_inode(dir, mode); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - inode->i_op = &f2fs_file_inode_operations; - inode->i_fop = &f2fs_file_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - - f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); - if (err) - goto out; - - err = f2fs_do_tmpfile(inode, dir); - if (err) - goto release_out; - - /* - * add this non-linked tmpfile to orphan list, in this way we could - * remove all unused data of tmpfile after abnormal power-off. - */ - add_orphan_inode(sbi, inode->i_ino); - f2fs_unlock_op(sbi); - - alloc_nid_done(sbi, inode->i_ino); - - d_tmpfile(dentry, inode); - unlock_new_inode(inode); - return 0; - -release_out: - release_orphan_inode(sbi); -out: - handle_failed_inode(inode); - return err; + return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } #ifdef CONFIG_F2FS_FS_ENCRYPTION From 304eecc3462ed62006433d04e3ad945f92f90d52 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 May 2015 16:11:40 -0700 Subject: [PATCH 63/83] f2fs crypto: check encryption for tmpfile This patch adds to check encryption for tmpfile in early stage. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6edad57d7b4768..a316783de8c9ca 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -569,6 +569,12 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { + if (f2fs_encrypted_inode(dir)) { + int err = f2fs_get_encryption_info(dir); + if (err) + return err; + } + return __f2fs_tmpfile(dir, dentry, mode, NULL); } From da554e48caab95a48afe0d7be8d9eae2bbdaa28d Mon Sep 17 00:00:00 2001 From: hujianyang Date: Thu, 21 May 2015 14:42:53 +0800 Subject: [PATCH 64/83] f2fs: recovering broken superblock during mount This patch recovers a broken superblock with the other valid one. Signed-off-by: hujianyang [Jaegeuk Kim: reinitialize local variables in f2fs_fill_super for retrial] Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 65 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4f74fee2e2afd8..d4cd04dc331497 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -970,29 +970,36 @@ static void init_sb_info(struct f2fs_sb_info *sbi) */ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf) + struct buffer_head **raw_super_buf, + int *recovery) { int block = 0; + struct buffer_head *buffer; + struct f2fs_super_block *super; + int err = 0; retry: - *raw_super_buf = sb_bread(sb, block); - if (!*raw_super_buf) { + buffer = sb_bread(sb, block); + if (!buffer) { + *recovery = 1; f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", block + 1); if (block == 0) { block++; goto retry; } else { - return -EIO; + err = -EIO; + goto out; } } - *raw_super = (struct f2fs_super_block *) - ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); + super = (struct f2fs_super_block *) + ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (sanity_check_raw_super(sb, *raw_super)) { - brelse(*raw_super_buf); + if (sanity_check_raw_super(sb, super)) { + brelse(buffer); + *recovery = 1; f2fs_msg(sb, KERN_ERR, "Can't find valid F2FS filesystem in %dth superblock", block + 1); @@ -1000,10 +1007,30 @@ static int read_raw_super_block(struct super_block *sb, block++; goto retry; } else { - return -EINVAL; + err = -EINVAL; + goto out; } } + if (!*raw_super) { + *raw_super_buf = buffer; + *raw_super = super; + } else { + /* already have a valid superblock */ + brelse(buffer); + } + + /* check the validity of the second superblock */ + if (block == 0) { + block++; + goto retry; + } + +out: + /* No valid superblock */ + if (!*raw_super) + return err; + return 0; } @@ -1034,15 +1061,20 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi) static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super = NULL; + struct f2fs_super_block *raw_super; struct buffer_head *raw_super_buf; struct inode *root; - long err = -EINVAL; + long err; bool retry = true, need_fsck = false; char *options = NULL; - int i; + int recovery, i; try_onemore: + err = -EINVAL; + raw_super = NULL; + raw_super_buf = NULL; + recovery = 0; + /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); if (!sbi) @@ -1054,7 +1086,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - err = read_raw_super_block(sb, &raw_super, &raw_super_buf); + err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery); if (err) goto free_sbi; @@ -1252,6 +1284,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_kobj; } kfree(options); + + /* recover broken superblock */ + if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { + f2fs_msg(sb, KERN_INFO, "Recover invalid superblock"); + f2fs_commit_super(sbi); + } + return 0; free_kobj: From 26bf3dc7e25b813ff5c92234f8165941fdc12a63 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 May 2015 22:26:54 -0700 Subject: [PATCH 65/83] f2fs crypto: use per-inode tfm structure This patch applies the following ext4 patch: ext4 crypto: use per-inode tfm structure As suggested by Herbert Xu, we shouldn't allocate a new tfm each time we read or write a page. Instead we can use a single tfm hanging off the inode's crypt_info structure for all of our encryption needs for that inode, since the tfm can be used by multiple crypto requests in parallel. Also use cmpxchg() to avoid races that could result in crypt_info structure getting doubly allocated or doubly freed. Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 82 +++---------------------------- fs/f2fs/crypto_fname.c | 48 +----------------- fs/f2fs/crypto_key.c | 107 ++++++++++++++++++++++++++++++----------- fs/f2fs/dir.c | 8 +-- fs/f2fs/f2fs.h | 5 +- fs/f2fs/f2fs_crypto.h | 4 -- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 4 +- fs/f2fs/super.c | 3 +- 9 files changed, 96 insertions(+), 167 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index c6d11229aee4d2..2c7819aebcc5e4 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -91,8 +91,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) } ctx->w.control_page = NULL; if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { - if (ctx->tfm) - crypto_free_tfm(ctx->tfm); kmem_cache_free(f2fs_crypto_ctx_cachep, ctx); } else { spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); @@ -113,7 +111,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) { struct f2fs_crypto_ctx *ctx = NULL; - int res = 0; unsigned long flags; struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; @@ -138,56 +135,13 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); if (!ctx) { ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS); - if (!ctx) { - res = -ENOMEM; - goto out; - } + if (!ctx) + return ERR_PTR(-ENOMEM); ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } else { ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } ctx->flags &= ~F2FS_WRITE_PATH_FL; - - /* - * Allocate a new Crypto API context if we don't already have - * one or if it isn't the right mode. - */ - if (ctx->tfm && (ctx->mode != ci->ci_data_mode)) { - crypto_free_tfm(ctx->tfm); - ctx->tfm = NULL; - ctx->mode = F2FS_ENCRYPTION_MODE_INVALID; - } - if (!ctx->tfm) { - switch (ci->ci_data_mode) { - case F2FS_ENCRYPTION_MODE_AES_256_XTS: - ctx->tfm = crypto_ablkcipher_tfm( - crypto_alloc_ablkcipher("xts(aes)", 0, 0)); - break; - case F2FS_ENCRYPTION_MODE_AES_256_GCM: - /* - * TODO(mhalcrow): AEAD w/ gcm(aes); - * crypto_aead_setauthsize() - */ - ctx->tfm = ERR_PTR(-ENOTSUPP); - break; - default: - BUG(); - } - if (IS_ERR_OR_NULL(ctx->tfm)) { - res = PTR_ERR(ctx->tfm); - ctx->tfm = NULL; - goto out; - } - ctx->mode = ci->ci_data_mode; - } - BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode)); - -out: - if (res) { - if (!IS_ERR_OR_NULL(ctx)) - f2fs_release_crypto_ctx(ctx); - ctx = ERR_PTR(res); - } return ctx; } @@ -229,11 +183,8 @@ static void f2fs_crypto_destroy(void) { struct f2fs_crypto_ctx *pos, *n; - list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { - if (pos->tfm) - crypto_free_tfm(pos->tfm); + list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) kmem_cache_free(f2fs_crypto_ctx_cachep, pos); - } INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); if (f2fs_bounce_page_pool) mempool_destroy(f2fs_bounce_page_pool); @@ -383,32 +334,11 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, struct ablkcipher_request *req = NULL; DECLARE_F2FS_COMPLETION_RESULT(ecr); struct scatterlist dst, src; - struct f2fs_inode_info *fi = F2FS_I(inode); - struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm); + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; int res = 0; - BUG_ON(!ctx->tfm); - BUG_ON(ctx->mode != fi->i_crypt_info->ci_data_mode); - - if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) { - printk_ratelimited(KERN_ERR - "%s: unsupported crypto algorithm: %d\n", - __func__, ctx->mode); - return -ENOTSUPP; - } - - crypto_ablkcipher_clear_flags(atfm, ~0); - crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY); - - res = crypto_ablkcipher_setkey(atfm, fi->i_crypt_info->ci_raw, - fi->i_crypt_info->ci_size); - if (res) { - printk_ratelimited(KERN_ERR - "%s: crypto_ablkcipher_setkey() failed\n", - __func__); - return res; - } - req = ablkcipher_request_alloc(atfm, GFP_NOFS); + req = ablkcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index 016c4b63b53d87..81852cca7bbe39 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -249,52 +249,6 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -int f2fs_setup_fname_crypto(struct inode *inode) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_crypt_info *ci = fi->i_crypt_info; - struct crypto_ablkcipher *ctfm; - int res; - - /* Check if the crypto policy is set on the inode */ - res = f2fs_encrypted_inode(inode); - if (res == 0) - return 0; - - res = f2fs_get_encryption_info(inode); - if (res < 0) - return res; - ci = fi->i_crypt_info; - - if (!ci || ci->ci_ctfm) - return 0; - - if (ci->ci_filename_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { - printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n", - ci->ci_filename_mode); - return -ENOKEY; - } - - ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - printk(KERN_DEBUG "%s: error (%d) allocating crypto tfm\n", - __func__, res); - return res; - } - crypto_ablkcipher_clear_flags(ctfm, ~0); - crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), - CRYPTO_TFM_REQ_WEAK_KEY); - - res = crypto_ablkcipher_setkey(ctfm, ci->ci_raw, ci->ci_size); - if (res) { - crypto_free_ablkcipher(ctfm); - return -EIO; - } - ci->ci_ctfm = ctfm; - return 0; -} - /** * f2fs_fname_crypto_round_up() - * @@ -427,7 +381,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.len = iname->len; return 0; } - ret = f2fs_setup_fname_crypto(dir); + ret = f2fs_get_encryption_info(dir); if (ret) return ret; ci = F2FS_I(dir)->i_crypt_info; diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 8a10569a8b7cdd..95b8f936f00b64 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -87,20 +87,31 @@ static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE], return res; } -void f2fs_free_encryption_info(struct inode *inode) +static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci) { - struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_crypt_info *ci = fi->i_crypt_info; - if (!ci) return; if (ci->ci_keyring_key) key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); - memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw)); kmem_cache_free(f2fs_crypt_info_cachep, ci); - fi->i_crypt_info = NULL; +} + +void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *prev; + + if (ci == NULL) + ci = ACCESS_ONCE(fi->i_crypt_info); + if (ci == NULL) + return; + prev = cmpxchg(&fi->i_crypt_info, ci, NULL); + if (prev != ci) + return; + + f2fs_free_crypt_info(ci); } int _f2fs_get_encryption_info(struct inode *inode) @@ -113,17 +124,23 @@ int _f2fs_get_encryption_info(struct inode *inode) struct f2fs_encryption_key *master_key; struct f2fs_encryption_context ctx; struct user_key_payload *ukp; + struct crypto_ablkcipher *ctfm; + const char *cipher_str; + char raw_key[F2FS_MAX_KEY_SIZE]; + char mode; int res; res = f2fs_crypto_initialize(); if (res) return res; - - if (fi->i_crypt_info) { - if (!fi->i_crypt_info->ci_keyring_key || - key_validate(fi->i_crypt_info->ci_keyring_key) == 0) +retry: + crypt_info = ACCESS_ONCE(fi->i_crypt_info); + if (crypt_info) { + if (!crypt_info->ci_keyring_key || + key_validate(crypt_info->ci_keyring_key) == 0) return 0; - f2fs_free_encryption_info(inode); + f2fs_free_encryption_info(inode, crypt_info); + goto retry; } res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, @@ -143,18 +160,30 @@ int _f2fs_get_encryption_info(struct inode *inode) crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; + crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) - crypt_info->ci_size = - f2fs_encryption_key_size(crypt_info->ci_data_mode); + mode = crypt_info->ci_data_mode; else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - crypt_info->ci_size = - f2fs_encryption_key_size(crypt_info->ci_filename_mode); + mode = crypt_info->ci_filename_mode; else BUG(); - BUG_ON(!crypt_info->ci_size); + switch (mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + cipher_str = "xts(aes)"; + break; + case F2FS_ENCRYPTION_MODE_AES_256_CTS: + cipher_str = "cts(cbc(aes))"; + break; + default: + printk_once(KERN_WARNING + "f2fs: unsupported key mode %d (ino %u)\n", + mode, (unsigned) inode->i_ino); + res = -ENOKEY; + goto out; + } memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, F2FS_KEY_DESC_PREFIX_SIZE); @@ -169,6 +198,7 @@ int _f2fs_get_encryption_info(struct inode *inode) keyring_key = NULL; goto out; } + crypt_info->ci_keyring_key = keyring_key; BUG_ON(keyring_key->type != &key_type_logon); ukp = ((struct user_key_payload *)keyring_key->payload.data); if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { @@ -180,19 +210,40 @@ int _f2fs_get_encryption_info(struct inode *inode) F2FS_KEY_DERIVATION_NONCE_SIZE); BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE); res = f2fs_derive_key_aes(ctx.nonce, master_key->raw, - crypt_info->ci_raw); -out: - if (res < 0) { - if (res == -ENOKEY) - res = 0; - kmem_cache_free(f2fs_crypt_info_cachep, crypt_info); - } else { - fi->i_crypt_info = crypt_info; - crypt_info->ci_keyring_key = keyring_key; - keyring_key = NULL; + raw_key); + if (res) + goto out; + + ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + printk(KERN_DEBUG + "%s: error %d (inode %u) allocating crypto tfm\n", + __func__, res, (unsigned) inode->i_ino); + goto out; } - if (keyring_key) - key_put(keyring_key); + crypt_info->ci_ctfm = ctfm; + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + res = crypto_ablkcipher_setkey(ctfm, raw_key, + f2fs_encryption_key_size(mode)); + if (res) + goto out; + + memzero_explicit(raw_key, sizeof(raw_key)); + if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) { + f2fs_free_crypt_info(crypt_info); + goto retry; + } + return 0; + +out: + if (res == -ENOKEY && !S_ISREG(inode->i_mode)) + res = 0; + + f2fs_free_crypt_info(crypt_info); + memzero_explicit(raw_key, sizeof(raw_key)); return res; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3e923763daca0c..a34ebd8312ab89 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -825,11 +825,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_str fstr = FSTR_INIT(NULL, 0); int err = 0; - err = f2fs_setup_fname_crypto(inode); - if (err) - return err; - if (f2fs_encrypted_inode(inode)) { + err = f2fs_get_encryption_info(inode); + if (err) + return err; + err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); if (err < 0) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7e93fcf2df788b..70cdf7b81e8067 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2016,7 +2016,7 @@ int f2fs_decrypt_one(struct inode *, struct page *); void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); /* crypto_key.c */ -void f2fs_free_encryption_info(struct inode *); +void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *); int _f2fs_get_encryption_info(struct inode *inode); /* crypto_fname.c */ @@ -2051,7 +2051,6 @@ static inline int f2fs_get_encryption_info(struct inode *inode) return 0; } -int f2fs_setup_fname_crypto(struct inode *); void f2fs_fname_crypto_free_buffer(struct f2fs_str *); int f2fs_fname_setup_filename(struct inode *, const struct qstr *, int lookup, struct f2fs_filename *); @@ -2065,8 +2064,6 @@ static inline void f2fs_exit_crypto(void) { } static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } - -static inline int f2fs_setup_fname_crypto(struct inode *i) { return 0; } static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { } static inline int f2fs_fname_setup_filename(struct inode *dir, diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index e33cec9c3f3612..be59d91928fdec 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -75,13 +75,11 @@ struct f2fs_encryption_key { } __attribute__((__packed__)); struct f2fs_crypt_info { - unsigned char ci_size; char ci_data_mode; char ci_filename_mode; char ci_flags; struct crypto_ablkcipher *ci_ctfm; struct key *ci_keyring_key; - char ci_raw[F2FS_MAX_KEY_SIZE]; char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; }; @@ -90,7 +88,6 @@ struct f2fs_crypt_info { #define F2FS_WRITE_PATH_FL 0x00000004 struct f2fs_crypto_ctx { - struct crypto_tfm *tfm; /* Crypto API context */ union { struct { struct page *bounce_page; /* Ciphertext page */ @@ -103,7 +100,6 @@ struct f2fs_crypto_ctx { struct list_head free_list; /* Free list */ }; char flags; /* Flags */ - char mode; /* Encryption mode for tfm */ }; struct f2fs_completion_result { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ea6ba3bc84725e..2550868dc651ee 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -364,7 +364,7 @@ void f2fs_evict_inode(struct inode *inode) out_clear: #ifdef CONFIG_F2FS_FS_ENCRYPTION if (F2FS_I(inode)->i_crypt_info) - f2fs_free_encryption_info(inode); + f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); #endif clear_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a316783de8c9ca..55d0d27dfdf206 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -364,7 +364,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto err_out; - err = f2fs_setup_fname_crypto(inode); + err = f2fs_get_encryption_info(inode); if (err) goto err_out; @@ -929,7 +929,7 @@ static void *f2fs_encrypted_follow_link(struct dentry *dentry, u32 max_size = inode->i_sb->s_blocksize; int res; - res = f2fs_setup_fname_crypto(inode); + res = f2fs_get_encryption_info(inode); if (res) return ERR_PTR(res); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d4cd04dc331497..8b5f5fb8f5cb4e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -449,7 +449,8 @@ static int f2fs_drop_inode(struct inode *inode) #ifdef CONFIG_F2FS_FS_ENCRYPTION if (F2FS_I(inode)->i_crypt_info) - f2fs_free_encryption_info(inode); + f2fs_free_encryption_info(inode, + F2FS_I(inode)->i_crypt_info); #endif spin_lock(&inode->i_lock); } From 12377024719f08b7411afe9fc0169b13808dfefa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 May 2015 18:03:38 +0800 Subject: [PATCH 66/83] f2fs: avoid duplicated code by reusing f2fs_read_end_io This patch tries to clean up code because part code of f2fs_read_end_io and mpage_end_io are the same, so it's better to merge and reuse them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8d04e24a889ff3..9bedfa8dd3a530 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -34,29 +34,6 @@ static void f2fs_read_end_io(struct bio *bio, int err) struct bio_vec *bvec; int i; - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (!err) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); - } - bio_put(bio); -} - -/* - * I/O completion handler for multipage BIOs. - * copied from fs/mpage.c - */ -static void mpage_end_io(struct bio *bio, int err) -{ - struct bio_vec *bv; - int i; - if (f2fs_bio_encrypted(bio)) { if (err) { f2fs_release_crypto_ctx(bio->bi_private); @@ -66,8 +43,8 @@ static void mpage_end_io(struct bio *bio, int err) } } - bio_for_each_segment_all(bv, bio, i) { - struct page *page = bv->bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (!err) { SetPageUptodate(page); @@ -77,7 +54,6 @@ static void mpage_end_io(struct bio *bio, int err) } unlock_page(page); } - bio_put(bio); } @@ -122,7 +98,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, bio->bi_bdev = sbi->sb->s_bdev; bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = sbi; + bio->bi_private = is_read ? NULL : sbi; return bio; } @@ -1584,7 +1560,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, } bio->bi_bdev = bdev; bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); - bio->bi_end_io = mpage_end_io; + bio->bi_end_io = f2fs_read_end_io; bio->bi_private = ctx; } From d3baf7c4725601d4689397b9f7dde9e3ddea032d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 May 2015 18:07:02 +0800 Subject: [PATCH 67/83] f2fs crypto: check context consistent for rename2 For exchange rename, we should check context consistent of encryption between new_dir and old_inode or old_dir and new_inode. Otherwise inheritance of parent's encryption context will be broken. Signed-off-by: Chao Yu [Jaegeuk Kim: sync with ext4 approach] Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 55d0d27dfdf206..1cc24a0cbc589b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -756,6 +756,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, int old_nlink = 0, new_nlink = 0; int err = -ENOENT; + if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) && + (old_dir != new_dir) && + (!f2fs_is_child_context_consistent_with_parent(new_dir, + old_inode) || + !f2fs_is_child_context_consistent_with_parent(old_dir, + new_inode))) + return -EPERM; + f2fs_balance_fs(sbi); old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); From 81b0a8ffaa92012e63ea611e8aeb9e74acfcedac Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 May 2015 18:09:03 +0800 Subject: [PATCH 68/83] f2fs crypto: allow setting encryption policy once This patch add XATTR_CREATE flag in setxattr when setting encryption context for inode. Without this flag the context could be set more than once, this should never happen. So, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index bef254b21815a5..30b0b73d420003 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -83,7 +83,7 @@ static int f2fs_create_encryption_context_from_policy( return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx), NULL, 0); + sizeof(ctx), NULL, XATTR_CREATE); } int f2fs_process_policy(const struct f2fs_encryption_policy *policy, @@ -202,5 +202,5 @@ int f2fs_inherit_context(struct inode *parent, struct inode *child, get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx), ipage, 0); + sizeof(ctx), ipage, XATTR_CREATE); } From 4637fd11fff7ddca1a1d7d27d2fbfc4dcb8e9791 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 May 2015 15:27:49 +0800 Subject: [PATCH 69/83] f2fs crypto: do not set encryption policy for non-directory by ioctl Encryption policy should only be set to an empty directory through ioctl, This patch add a judgement condition to verify type of the target inode to avoid incorrectly configuring for non-directory. Additionally, remove unneeded inline data conversion since regular or symlink file should not be processed here. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_policy.c | 3 +++ fs/f2fs/file.c | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index 30b0b73d420003..d4a96af513c22f 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -92,6 +92,9 @@ int f2fs_process_policy(const struct f2fs_encryption_policy *policy, if (policy->version != 0) return -EINVAL; + if (!S_ISDIR(inode->i_mode)) + return -EINVAL; + if (!f2fs_inode_has_encryption_context(inode)) { if (!f2fs_empty_dir(inode)) return -ENOTEMPTY; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cb0d6b68020a16..6b4ba7429dbaf2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1395,12 +1395,6 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) sizeof(policy))) return -EFAULT; - if (f2fs_has_inline_data(inode)) { - int ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } - return f2fs_process_policy(&policy, inode); #else return -EOPNOTSUPP; From e992e238ff93920da65f5aa83d3f3e257530ce8b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 27 May 2015 19:51:42 -0700 Subject: [PATCH 70/83] f2fs crypto: avoid f2fs_inherit_context for symlink This patch fixes to call f2fs_inherit_context twice for newly created symlink. The original one is called by f2fs_add_link(), which invokes f2fs_setxattr. If the second one is called again, f2fs_setxattr is triggered again with same encryption index. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1cc24a0cbc589b..83e21a17497745 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -360,10 +360,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (f2fs_encrypted_inode(dir)) { struct qstr istr = QSTR_INIT(symname, len); - err = f2fs_inherit_context(dir, inode, NULL); - if (err) - goto err_out; - err = f2fs_get_encryption_info(inode); if (err) goto err_out; From e5e0906b6b4c517a8622f2ff196c19cbd1068644 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 May 2015 17:06:40 -0700 Subject: [PATCH 71/83] f2fs crypto: clean up error handling in f2fs_fname_setup_filename Sync with: ext4 crypto: clean up error handling in ext4_fname_setup_filename Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_fname.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index 81852cca7bbe39..ab377d496a39ad 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -392,15 +392,13 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, return ret; ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); if (ret < 0) - goto out; + goto errout; fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; return 0; } - if (!lookup) { - ret = -EACCES; - goto out; - } + if (!lookup) + return -EACCES; /* We don't have the key and we are doing a lookup; decode the * user-supplied name @@ -408,19 +406,17 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, if (iname->name[0] == '_') bigname = 1; if ((bigname && (iname->len != 33)) || - (!bigname && (iname->len > 43))) { - ret = -ENOENT; - } + (!bigname && (iname->len > 43))) + return -ENOENT; + fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); - if (fname->crypto_buf.name == NULL) { - ret = -ENOMEM; - goto out; - } + if (fname->crypto_buf.name == NULL) + return -ENOMEM; ret = digest_decode(iname->name + bigname, iname->len - bigname, fname->crypto_buf.name); if (ret < 0) { ret = -ENOENT; - goto out; + goto errout; } fname->crypto_buf.len = ret; if (bigname) { @@ -430,7 +426,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.len = fname->crypto_buf.len; } return 0; -out: +errout: f2fs_fname_crypto_free_buffer(&fname->crypto_buf); return ret; } From 9236cac5666ea8b3a3b92b132a046c200b99dca8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 May 2015 18:19:17 -0700 Subject: [PATCH 72/83] f2fs: fix a deadlock for summary page lock vs. sentry_lock In f2fs_gc: In f2fs_replace_block: - lock_page(sum_page) - check_valid_map() - mutex_lock(sentry_lock) - mutex_lock(sentry_lock) - change_curseg() - lock_page(sum_page) This patch fixes the deadlock condition. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 43354cb3ce9492..e1e73617d13b6c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -750,6 +750,15 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, sum = page_address(sum_page); + /* + * this is to avoid deadlock: + * - lock_page(sum_page) - f2fs_replace_block + * - check_valid_map() - mutex_lock(sentry_lock) + * - mutex_lock(sentry_lock) - change_curseg() + * - lock_page(sum_page) + */ + unlock_page(sum_page); + switch (GET_SUM_TYPE((&sum->footer))) { case SUM_TYPE_NODE: gc_node_segment(sbi, sum->entries, segno, gc_type); @@ -763,7 +772,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); stat_inc_call_count(sbi->stat_info); - f2fs_put_page(sum_page, 1); + f2fs_put_page(sum_page, 0); } int f2fs_gc(struct f2fs_sb_info *sbi) From 4683ff837c2d000212f72ce72cab22f061e5a77c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Jun 2015 12:39:30 -0700 Subject: [PATCH 73/83] f2fs crypto: remove alloc_page for bounce_page We don't need to call alloc_page() prior to mempool_alloc(), since the mempool_alloc() calls alloc_page() internally. And, if __GFP_WAIT is set, it never fails on page allocation, so let's give GFP_NOWAIT and handle ENOMEM by writepage(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 33 ++++++++++++--------------------- fs/f2fs/f2fs_crypto.h | 3 +-- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 2c7819aebcc5e4..be6af181e0f288 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -83,10 +83,7 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) unsigned long flags; if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) { - if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(ctx->w.bounce_page); - else - mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); + mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); ctx->w.bounce_page = NULL; } ctx->w.control_page = NULL; @@ -408,34 +405,28 @@ struct page *f2fs_encrypt(struct inode *inode, return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_page(GFP_NOFS); + ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); if (!ciphertext_page) { - /* - * This is a potential bottleneck, but at least we'll have - * forward progress. - */ - ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, - GFP_NOFS); - if (WARN_ON_ONCE(!ciphertext_page)) - ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, - GFP_NOFS | __GFP_WAIT); - ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + err = -ENOMEM; + goto err_out; } + ctx->flags |= F2FS_WRITE_PATH_FL; ctx->w.bounce_page = ciphertext_page; ctx->w.control_page = plaintext_page; err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); - if (err) { - f2fs_release_crypto_ctx(ctx); - return ERR_PTR(err); - } + if (err) + goto err_out; + SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)ctx); lock_page(ciphertext_page); return ciphertext_page; + +err_out: + f2fs_release_crypto_ctx(ctx); + return ERR_PTR(err); } /** diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index be59d91928fdec..c2c1c2b63b2552 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -84,8 +84,7 @@ struct f2fs_crypt_info { }; #define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 -#define F2FS_WRITE_PATH_FL 0x00000004 +#define F2FS_WRITE_PATH_FL 0x00000002 struct f2fs_crypto_ctx { union { From 96c6dd59bf1d8b30afd3681550b321f4183e1914 Mon Sep 17 00:00:00 2001 From: Chenxi Mao Date: Sat, 30 May 2015 22:56:46 +0800 Subject: [PATCH 74/83] f2fs: disable the discard option when device doesn't support Current f2fs check the whether the blk device can support discard. However, the code will cause the discard option cannot be enabled. Because the clear_opt(sbi, DISCARD) will be invoked forever. This patch can fix this issue. Jaegeuk Kim: The original patch was intended to disable the discard option when device does not support trim command. Rather than remaining the buggy patch, let's replace with this patch as an integrated one. Signed-off-by: Chenxi Mao Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8b5f5fb8f5cb4e..f0be6a62120ac3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1237,10 +1237,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (test_opt(sbi, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) + if (!blk_queue_discard(q)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); + clear_opt(sbi, DISCARD); + } } sbi->s_kobj.kset = f2fs_kset; From 528e34593d6eff11a289ef23452c66175a340f0b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 May 2015 19:15:35 +0800 Subject: [PATCH 75/83] f2fs: hide common code in f2fs_replace_block This patch clean up codes through: 1.rename f2fs_replace_block to __f2fs_replace_block(). 2.introduce new f2fs_replace_block() to include __f2fs_replace_block() and some common related codes around __f2fs_replace_block(). Then, newly introduced function f2fs_replace_block can be used by following patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/file.c | 12 ++---------- fs/f2fs/recovery.c | 9 ++------- fs/f2fs/segment.c | 18 +++++++++++++++++- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 70cdf7b81e8067..1a0716c369407a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1695,8 +1695,8 @@ void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(unsigned int, struct f2fs_io_info *); void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); void rewrite_data_page(struct f2fs_io_info *); -void f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, - block_t, block_t, bool); +void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, + block_t, block_t, unsigned char, bool); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6b4ba7429dbaf2..aec96d3f8222c3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -854,18 +854,10 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) set_data_blkaddr(&dn); } else if (new_addr != NEW_ADDR) { struct node_info ni; - struct f2fs_summary sum; get_node_info(sbi, dn.nid, &ni); - set_summary(&sum, dn.nid, dn.ofs_in_node, - ni.version); - - f2fs_replace_block(sbi, &sum, old_addr, - new_addr, true); - - dn.data_blkaddr = new_addr; - set_data_blkaddr(&dn); - f2fs_update_extent_cache(&dn); + f2fs_replace_block(sbi, &dn, old_addr, new_addr, + ni.version, true); } f2fs_put_dnode(&dn); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9de25878db2ba0..24a8c1d4f45f28 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -360,7 +360,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int start, end; struct dnode_of_data dn; - struct f2fs_summary sum; struct node_info ni; int err = 0, recovered = 0; @@ -420,13 +419,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto err; - set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); - /* write dummy data page */ - f2fs_replace_block(sbi, &sum, src, dest, false); - dn.data_blkaddr = dest; - set_data_blkaddr(&dn); - f2fs_update_extent_cache(&dn); + f2fs_replace_block(sbi, &dn, src, dest, + ni.version, false); recovered++; } dn.ofs_in_node++; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 59566ae3f96407..46283a153acfcd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1302,7 +1302,8 @@ void rewrite_data_page(struct f2fs_io_info *fio) f2fs_submit_page_mbio(fio); } -void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static void __f2fs_replace_block(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg) { @@ -1362,6 +1363,21 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, mutex_unlock(&curseg->curseg_mutex); } +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, + block_t old_addr, block_t new_addr, + unsigned char version, bool recover_curseg) +{ + struct f2fs_summary sum; + + set_summary(&sum, dn->nid, dn->ofs_in_node, version); + + __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg); + + dn->data_blkaddr = new_addr; + set_data_blkaddr(dn); + f2fs_update_extent_cache(dn); +} + static inline bool is_merged_page(struct f2fs_sb_info *sbi, struct page *page, enum page_type type) { From f62185d0e283e9d311e3ac1020f159d95f0aab39 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 May 2015 19:16:57 +0800 Subject: [PATCH 76/83] f2fs: support FALLOC_FL_INSERT_RANGE FALLOC_FL_INSERT_RANGE flag for ->fallocate was introduced in commit dd46c787788d ("fs: Add support FALLOC_FL_INSERT_RANGE for fallocate"). The effect of FALLOC_FL_INSERT_RANGE command is the opposite of FALLOC_FL_COLLAPSE_RANGE, if this command was performed, all data from offset to EOF in our file will be shifted to right as given length, and then range [offset, offset + length] becomes a hole. This command is useful for our user who wants to add some data in the middle of the file, for example: video/music editor will insert a keyframe in specified position of media file, with this command we can easily create a hole for inserting without removing original data. This patch introduces f2fs_insert_range() to support FALLOC_FL_INSERT_RANGE. Signed-off-by: Chao Yu Signed-off-by: Yuan Zhong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index aec96d3f8222c3..4d42d66acd96cf 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1010,6 +1010,100 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, return ret; } +static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t pg_start, pg_end, delta, nrpages, idx; + loff_t new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + new_size = i_size_read(inode) + len; + if (new_size > inode->i_sb->s_maxbytes) + return -EFBIG; + + if (offset >= i_size_read(inode)) + return -EINVAL; + + /* insert range should be aligned to block size of f2fs. */ + if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + f2fs_balance_fs(sbi); + + ret = truncate_blocks(inode, i_size_read(inode), true); + if (ret) + return ret; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); + if (ret) + return ret; + + truncate_pagecache(inode, offset); + + pg_start = offset >> PAGE_CACHE_SHIFT; + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + delta = pg_end - pg_start; + nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + + for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { + struct dnode_of_data dn; + struct page *ipage; + block_t new_addr, old_addr; + + f2fs_lock_op(sbi); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, idx, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) { + goto out; + } else if (ret == -ENOENT) { + goto next; + } else if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_dnode(&dn); + goto next; + } else { + new_addr = dn.data_blkaddr; + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, idx + delta); + if (ret) + goto out; + + old_addr = dn.data_blkaddr; + f2fs_bug_on(sbi, old_addr != NEW_ADDR); + + if (new_addr != NEW_ADDR) { + struct node_info ni; + + get_node_info(sbi, dn.nid, &ni); + f2fs_replace_block(sbi, &dn, old_addr, new_addr, + ni.version, true); + } + f2fs_put_dnode(&dn); +next: + f2fs_unlock_op(sbi); + } + + i_size_write(inode, new_size); + return 0; +out: + f2fs_unlock_op(sbi); + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -1077,11 +1171,13 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; - if (f2fs_encrypted_inode(inode) && (mode & FALLOC_FL_COLLAPSE_RANGE)) + if (f2fs_encrypted_inode(inode) && + (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | + FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); @@ -1095,6 +1191,8 @@ static long f2fs_fallocate(struct file *file, int mode, ret = f2fs_collapse_range(inode, offset, len); } else if (mode & FALLOC_FL_ZERO_RANGE) { ret = f2fs_zero_range(inode, offset, len, mode); + } else if (mode & FALLOC_FL_INSERT_RANGE) { + ret = f2fs_insert_range(inode, offset, len); } else { ret = expand_inode_data(inode, offset, len, mode); } From f56aa1c57ea921e45bb6cf5d83a04cbc8f38fae5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Jun 2015 15:48:20 -0700 Subject: [PATCH 77/83] f2fs: fix to return exact trimmed size Now, we add all the candidates for trim commands and then finally issue discard commands. So, we should count the trimmed size in back-end. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 46283a153acfcd..1eb343768781f3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -544,7 +544,6 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, list_add_tail(&new->list, head); done: SM_I(sbi)->nr_discards += end - start; - cpc->trimmed += end - start; } static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) @@ -646,6 +645,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); + cpc->trimmed += entry->len; skip: list_del(&entry->list); SM_I(sbi)->nr_discards -= entry->len; From 09d54cdd207bb3b07e00983f2f50bd68c354ce31 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jun 2015 13:20:10 +0800 Subject: [PATCH 78/83] f2fs: setting discard option in parse_options() For the first mount of f2fs image with realtime discard option, we will disable discard option if device is not supported, but for remount operation, our discard option can still be set, this should be avoided. This patch moves configuring of discard option to parse_options() to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f0be6a62120ac3..d54c54f6a49d0a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -258,6 +258,7 @@ static void init_once(void *foo) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -302,7 +303,14 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - set_opt(sbi, DISCARD); + q = bdev_get_queue(sb->s_bdev); + if (blk_queue_discard(q)) { + set_opt(sbi, DISCARD); + } else { + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } break; case Opt_noheap: set_opt(sbi, NOHEAP); @@ -1235,16 +1243,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) proc_create_data("segment_info", S_IRUGO, sbi->s_proc, &f2fs_seq_segment_info_fops, sb); - if (test_opt(sbi, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - clear_opt(sbi, DISCARD); - } - } - sbi->s_kobj.kset = f2fs_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, From c5bda1c8b13ad7840ae0e8c9a2926df686ba6e06 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jun 2015 13:28:03 +0800 Subject: [PATCH 79/83] f2fs: skip committing valid superblock In recovery procedure for superblock, we try to write data of valid superblock into invalid one for recovery, work should be finished here, but then still we will write the valid one with its original data. This operation is not needed. Let's skip doing this unnecessary work. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/super.c | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1a0716c369407a..a8327ed7389873 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1624,7 +1624,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ -int f2fs_commit_super(struct f2fs_sb_info *); +int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4d42d66acd96cf..096e08ca394586 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1530,7 +1530,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); - err = f2fs_commit_super(sbi); + err = f2fs_commit_super(sbi, false); mnt_drop_write_file(filp); if (err) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d54c54f6a49d0a..a06b0b46fe6952 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1043,7 +1043,7 @@ static int read_raw_super_block(struct super_block *sb, return 0; } -int f2fs_commit_super(struct f2fs_sb_info *sbi) +int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *sbh = sbi->raw_super_buf; sector_t block = sbh->b_blocknr; @@ -1055,7 +1055,9 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi) err = sync_dirty_buffer(sbh); sbh->b_blocknr = block; - if (err) + + /* if we are in recovery path, skip writing valid superblock */ + if (recover || err) goto out; /* write current valid superblock */ @@ -1289,7 +1291,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover broken superblock */ if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { f2fs_msg(sb, KERN_INFO, "Recover invalid superblock"); - f2fs_commit_super(sbi); + f2fs_commit_super(sbi, true); } return 0; From de6a8ec9822dbe2ce1f5a273cfac9e16032387e2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 8 Jun 2015 17:51:10 -0700 Subject: [PATCH 80/83] f2fs: drop the volatile_write flag only When aborting volatile_writes, let's drop its flag and give up any further volatile_writes. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 096e08ca394586..53c1b743754b08 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1391,11 +1391,9 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); } - if (f2fs_is_volatile_file(inode)) { + if (f2fs_is_volatile_file(inode)) clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - filemap_fdatawrite(inode->i_mapping); - set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - } + mnt_drop_write_file(filp); return ret; } From 7e8e754a4b722216d18418b36c4b8976e7418f2b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 11 Jun 2015 14:23:05 -0700 Subject: [PATCH 81/83] f2fs crypto: fix to handle errors likewise ext4 This patch makes some error handling policies same with ext4. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index be6af181e0f288..75d62fff896051 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -112,7 +112,7 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; if (ci == NULL) - return ERR_PTR(-EACCES); + return ERR_PTR(-ENOKEY); /* * We first try getting the ctx from a free list because in @@ -457,8 +457,8 @@ int f2fs_decrypt_one(struct inode *inode, struct page *page) struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode); int ret; - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = f2fs_decrypt(ctx, page); f2fs_release_crypto_ctx(ctx); return ret; From 43f54cd52f6eea2017505d2a3ac82d372c33749b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 11 Jun 2015 14:33:29 -0700 Subject: [PATCH 82/83] f2fs crypto: add alloc_bounce_page This patch adds alloc_bounce_page likewise ext4. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 75d62fff896051..4a62ef14e93275 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -376,6 +376,15 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, return 0; } +static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx) +{ + ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); + if (ctx->w.bounce_page == NULL) + return ERR_PTR(-ENOMEM); + ctx->flags |= F2FS_WRITE_PATH_FL; + return ctx->w.bounce_page; +} + /** * f2fs_encrypt() - Encrypts a page * @inode: The inode for which the encryption should take place @@ -405,19 +414,17 @@ struct page *f2fs_encrypt(struct inode *inode, return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); - if (!ciphertext_page) { - err = -ENOMEM; + ciphertext_page = alloc_bounce_page(ctx); + if (IS_ERR(ciphertext_page)) goto err_out; - } - ctx->flags |= F2FS_WRITE_PATH_FL; - ctx->w.bounce_page = ciphertext_page; ctx->w.control_page = plaintext_page; err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); - if (err) + if (err) { + ciphertext_page = ERR_PTR(err); goto err_out; + } SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)ctx); @@ -426,7 +433,7 @@ struct page *f2fs_encrypt(struct inode *inode, err_out: f2fs_release_crypto_ctx(ctx); - return ERR_PTR(err); + return ciphertext_page; } /** From 3c45414527487549f469484337a4c5ae5d84dc80 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 5 Jun 2015 18:34:02 +0800 Subject: [PATCH 83/83] f2fs: do not trim preallocated blocks when truncating after i_size When we perform generic/092 in xfstests, output is like below: XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 0: [0..10239]: data 0: [0..10239]: data -1: [10240..20479]: unwritten +1: [10240..14335]: unwritten This is because with this testcase, we redefine the regulation for truncate in perallocated space past i_size as below: "There was some confused about what the fs was supposed to do when you truncate at i_size with preallocated space past i_size. We decided on the following things. 1) truncate(i_size) will trim all blocks past i_size. 2) truncate(x) where x > i_size will not trim all blocks past i_size. " This method is used in xfs, and then ext4/btrfs will follow the rule. This patch fixes to follow the new rule for f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 53c1b743754b08..ada2a3dd701ad5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -651,16 +651,16 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) f2fs_get_encryption_info(inode)) return -EACCES; - if (attr->ia_size != i_size_read(inode)) { + if (attr->ia_size <= i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); f2fs_balance_fs(F2FS_I_SB(inode)); } else { /* - * giving a chance to truncate blocks past EOF which - * are fallocated with FALLOC_FL_KEEP_SIZE. + * do not trim all blocks after i_size if target size is + * larger than i_size. */ - f2fs_truncate(inode); + truncate_setsize(inode, attr->ia_size); } }