From d2dcd9083f101584e029cbd4f0e1a4e573170d43 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Fri, 9 Mar 2012 06:27:12 +0530 Subject: [PATCH 1/5] logfs: destroy the reserved inodes while unmounting We were assuming that the evict_inode() would never be called on reserved inodes. However, (after the commit 8e22c1a4e logfs: get rid of magical inodes) while unmounting the file system, in put_super, we call iput() on all of the reserved inodes. The following simple test used to cause a kernel panic on LogFS: 1. Mount a LogFS file system on /mnt 2. Create a file $ touch /mnt/a 3. Try to unmount the FS $ umount /mnt The simple fix would be to drop the assumption and properly destroy the reserved inodes. Signed-off-by: Prasad Joshi --- fs/logfs/inode.c | 16 ++++++++++++++++ fs/logfs/readwrite.c | 1 - fs/logfs/segment.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index a422f42238b250..df093d9e4da188 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, logfs_i_callback); } +static void __logfs_destroy_meta_inode(struct inode *inode) +{ + struct logfs_inode *li = logfs_inode(inode); + BUG_ON(li->li_block); + call_rcu(&inode->i_rcu, logfs_i_callback); +} + static void logfs_destroy_inode(struct inode *inode) { struct logfs_inode *li = logfs_inode(inode); + if (inode->i_ino < LOGFS_RESERVED_INOS) { + /* + * The reserved inodes are never destroyed unless we are in + * unmont path. + */ + __logfs_destroy_meta_inode(inode); + return; + } + BUG_ON(list_empty(&li->li_freeing_list)); spin_lock(&logfs_inode_lock); li->li_refcount--; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index e3ab5e5a904c23..c8ea8664699c22 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode) return; } - BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); page = inode_to_page(inode); BUG_ON(!page); /* FIXME: Use emergency page */ logfs_put_write_page(page); diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index e28d090c98d6bb..038da0991794a3 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb) static void map_invalidatepage(struct page *page, unsigned long l) { - BUG(); + return; } static int map_releasepage(struct page *page, gfp_t g) From cd8bfa9c8a13cf3facc5731da17e10188b3795d1 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 2 Apr 2012 09:23:04 +0530 Subject: [PATCH 2/5] logfs: initialize the number of iovecs in bio This fixes the following crash when a LogFS file system, created on a encrypted LVM volume, was mounted [ 526.548034] BUG: unable to handle kernel NULL pointer dereference at [ 526.550106] IP: [] memcpy+0xb/0x120 [ 526.551008] PGD bd60067 PUD 1778d067 PMD 0 [ 526.551783] Oops: 0000 [#1] SMP Pid: 2043, comm: mount RIP: 0010:[] [] memcpy+0xb/0x120 Call Trace: kcryptd_io_read+0xdb/0x100 crypt_map+0xfd/0x190 __map_bio+0x48/0x150 __split_and_process_bio+0x51b/0x630 dm_request+0x138/0x230 generic_make_request+0xca/0x100 submit_bio+0x87/0x110 sync_request+0xdd/0x120 [logfs] bdev_readpage+0x2e/0x70 [logfs] do_read_cache_page+0x82/0x180 logfs_mount+0x2ad/0x770 [logfs] mount_fs+0x47/0x1c0 vfs_kern_mount+0x72/0x110 do_kern_mount+0x54/0x110 do_mount+0x520/0x7f0 sys_mount+0x90/0xe0 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=42292 Reported-by: Witold Baryluk Signed-off-by: Prasad Joshi --- fs/logfs/dev_bdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index df0de27c273349..ea29df36893d3b 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) struct completion complete; bio_init(&bio); + bio.bi_max_vecs = 1; bio.bi_io_vec = &bio_vec; bio_vec.bv_page = page; bio_vec.bv_len = PAGE_SIZE; From ddb24bbac3681b87ae0638aacb702d9273e3c025 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 09:18:14 +0530 Subject: [PATCH 3/5] logfs: create a pagecache page if it is not present While writing the partial journal entries we assumed that the page associated with the journal would always in locatable. This incorrect assumption resulted in the following BUG kernel BUG at /home/benixon/WD_SMR/kernels/linux-3.3.7-logfs/fs/logfs/journal.c:569! EIP is at logfs_write_area+0xb6/0x109 [logfs] EAX: 00000000 EBX: 00000000 ECX: ef6efea4 EDX: 00000000 ESI: 001b9000 EDI: f009e000 EBP: c3c13f14 ESP: c3c13ef0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process sync (pid: 1799, ti=c3c12000 task=f07825b0 task.ti=c3c12000) Stack: 01001000 c3c13f26 781b9000 00000000 f009e000 f7286000 f1f83400 f8445071 f1f83400 c3c13f30 f8445ae9 c3c13f20 0000100a 000ee000 f009e000 00000001 c3c13f5c f8445d17 c05eb0ee 00000000 f1f83400 ef718000 f009e25c ea9c3d80 Call Trace: [] ? account_shadow+0x16d/0x16d [logfs] [] logfs_write_je+0x2a/0x44 [logfs] [] logfs_write_anchor+0x114/0x228 [logfs] [] ? empty+0x5/0x5 [] logfs_sync_fs+0x1e/0x31 [logfs] [] __sync_filesystem+0x5d/0x6f [] sync_one_sb+0x15/0x17 [] iterate_supers+0x59/0x9a [] ? __sync_filesystem+0x6f/0x6f [] sys_sync+0x29/0x4f [] sysenter_do_call+0x12/0x28 EIP: [] logfs_write_area+0xb6/0x109 [logfs] SS:ESP 0068:c3c13ef0 ---[ end trace ef6e9ef52601a945 ]--- The fix is to create the pagecache page if it is not locatable. Reported-and-tested-by: Benixon Dhas Signed-off-by: Prasad Joshi --- fs/logfs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 1e1c369df22bb0..2a09b8d7398953 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area, index = ofs >> PAGE_SHIFT; page_ofs = ofs & (PAGE_SIZE - 1); - page = find_lock_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); BUG_ON(!page); memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); unlock_page(page); From 41b93bc1ee7e7276db698dd66afa7b740cda517a Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 09:35:52 +0530 Subject: [PATCH 4/5] logfs: maintain the ordering of meta-inode destruction LogFS does not use a specialized area to maintain the inodes. The inodes information is kept in a specialized file called inode file. Similarly, the segment information is kept in a segment file. Since the segment file also has an inode which is kept in the inode file, the inode for segment file must be evicted before the inode for inode file. The change fixes the following BUG during unmount Pid: 2057, comm: umount Not tainted 3.5.0-rc6+ #25 Bochs Bochs RIP: 0010:[] [] move_page_to_btree+0x32/0x1f0 [logfs] Process umount (pid: 2057, threadinfo ...) Call Trace: [] ? find_get_pages+0x2a/0x180 [] logfs_invalidatepage+0x85/0x90 [logfs] [] truncate_inode_page+0xb1/0xd0 [] truncate_inode_pages_range+0x15f/0x490 [] ? printk+0x78/0x7a [] truncate_inode_pages+0x15/0x20 [] logfs_evict_inode+0x6c/0x190 [logfs] [] ? _raw_spin_unlock+0x2b/0x40 [] evict+0xa7/0x1b0 [] dispose_list+0x3e/0x60 [] evict_inodes+0xf4/0x110 [] generic_shutdown_super+0x53/0xf0 [] logfs_kill_sb+0x52/0xf0 [logfs] [] deactivate_locked_super+0x45/0x80 [] deactivate_super+0x4a/0x70 [] mntput_no_expire+0xde/0x140 [] sys_umount+0x6f/0x3a0 [] system_call_fastpath+0x16/0x1b ---[ end trace 45f7752082cefafd ]--- Signed-off-by: Prasad Joshi --- fs/logfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index df093d9e4da188..6984562738d36b 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -389,8 +389,8 @@ static void logfs_put_super(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); /* kill the meta-inodes */ - iput(super->s_master_inode); iput(super->s_segfile_inode); + iput(super->s_master_inode); iput(super->s_mapping_inode); } From 9f0bbd8ca7905fcc0602c038013b095322fec939 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 10:32:11 +0530 Subject: [PATCH 5/5] logfs: query block device for number of pages to send with bio The block device driver puts a limit on maximum number of pages that can be sent with the bio. Not all block devices can handle BIO_MAX_PAGES number of pages in bio. Specifically the virtio-blk diriver limits it to 126. When the LogFS file system was excersized in KVM, the following bug from do_virtblk_request() was observed static void do_virtblk_request(struct request_queue *q) { .... .... while ((req = blk_peek_request(q)) != NULL) { BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); .... .... } .... } The patch fixes the problem by querring the maximum number of pages in bio allowed from block device driver and then using those many pages during submit_bio. Signed-off-by: Prasad Joshi --- fs/logfs/dev_bdev.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index ea29df36893d3b..e784a217b50067 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -96,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, struct address_space *mapping = super->s_mapping_inode->i_mapping; struct bio *bio; struct page *page; - struct request_queue *q = bdev_get_queue(sb->s_bdev); - unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + unsigned int max_pages; int i; - if (max_pages > BIO_MAX_PAGES) - max_pages = BIO_MAX_PAGES; + max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); + bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); @@ -191,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, { struct logfs_super *super = logfs_super(sb); struct bio *bio; - struct request_queue *q = bdev_get_queue(sb->s_bdev); - unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + unsigned int max_pages; int i; - if (max_pages > BIO_MAX_PAGES) - max_pages = BIO_MAX_PAGES; + max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); + bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio);