Skip to content

Commit

Permalink
Merge tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy…
Browse files Browse the repository at this point in the history
…/linux-nfs

NFS client bugfixes and cleanups for Linux 3.3 (pull 2)

* tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  pnfsblock: alloc short extent before submit bio
  pnfsblock: remove rpc_call_ops from struct parallel_io
  pnfsblock: move find lock page logic out of bl_write_pagelist
  pnfsblock: cleanup bl_mark_sectors_init
  pnfsblock: limit bio page count
  pnfsblock: don't spinlock when freeing block_dev
  pnfsblock: clean up _add_entry
  pnfsblock: set read/write tk_status to pnfs_error
  pnfsblock: acquire im_lock in _preload_range
  NFS4: fix compile warnings in nfs4proc.c
  nfs: check for integer overflow in decode_devicenotify_args()
  NFS: cleanup endian type in decode_ds_addr()
  NFS: add an endian notation
  • Loading branch information
torvalds committed Jan 16, 2012
2 parents adfeb6e + 7c5465d commit a12587b
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 178 deletions.
202 changes: 131 additions & 71 deletions fs/nfs/blocklayout/blocklayout.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
struct rpc_call_ops call_ops;
void (*pnfs_callback) (void *data);
void (*pnfs_callback) (void *data, int num_se);
void *data;
int bse_count;
};

static inline struct parallel_io *alloc_parallel(void *data)
Expand All @@ -103,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
if (rv) {
rv->data = data;
kref_init(&rv->refcnt);
rv->bse_count = 0;
}
return rv;
}
Expand All @@ -117,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);

dprintk("%s enter\n", __func__);
p->pnfs_callback(p->data);
p->pnfs_callback(p->data, p->bse_count);
kfree(p);
}

Expand Down Expand Up @@ -146,14 +147,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
{
struct bio *bio;

npg = min(npg, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, npg);
if (!bio)
return NULL;
if (!bio && (current->flags & PF_MEMALLOC)) {
while (!bio && (npg /= 2))
bio = bio_alloc(GFP_NOIO, npg);
}

bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
if (bio) {
bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
}
return bio;
}

Expand Down Expand Up @@ -212,22 +218,15 @@ static void bl_read_cleanup(struct work_struct *work)
}

static void
bl_end_par_io_read(void *data)
bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;

rdata->task.tk_status = rdata->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}

/* We don't want normal .rpc_call_done callback used, so we replace it
* with this stub.
*/
static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
{
return;
}

static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
Expand All @@ -247,8 +246,6 @@ bl_read_pagelist(struct nfs_read_data *rdata)
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
par->call_ops = *rdata->mds_ops;
par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_read;
/* At this point, we can no longer jump to use_mds */

Expand Down Expand Up @@ -322,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
{
sector_t isect, end;
struct pnfs_block_extent *be;
struct pnfs_block_short_extent *se;

dprintk("%s(%llu, %u)\n", __func__, offset, count);
if (count == 0)
Expand All @@ -334,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
be = bl_find_get_extent(bl, isect, NULL);
BUG_ON(!be); /* FIXME */
len = min(end, be->be_f_offset + be->be_length) - isect;
if (be->be_state == PNFS_BLOCK_INVALID_DATA)
bl_mark_for_commit(be, isect, len); /* What if fails? */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
se = bl_pop_one_short_extent(be->be_inval);
BUG_ON(!se);
bl_mark_for_commit(be, isect, len, se);
}
isect += len;
bl_put_extent(be);
}
Expand All @@ -357,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
end_page_writeback(page);
page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
if (!uptodate) {

if (unlikely(!uptodate)) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
pnfs_set_lo_fail(wdata->lseg);
Expand All @@ -366,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
put_parallel(par);
}

/* This is basically copied from mpage_end_io_read */
static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
Expand All @@ -392,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
if (!wdata->pnfs_error) {
if (likely(!wdata->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
wdata->args.offset, wdata->args.count);
Expand All @@ -401,11 +402,16 @@ static void bl_write_cleanup(struct work_struct *work)
}

/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data)
static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;

wdata->task.tk_status = 0;
if (unlikely(wdata->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
num_se);
}

wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
Expand Down Expand Up @@ -484,6 +490,55 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
return ret;
}

/* Find or create a zeroing page marked being writeback.
* Return ERR_PTR on error, NULL to indicate skip this page and page itself
* to indicate write out.
*/
static struct page *
bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
struct pnfs_block_extent *cow_read)
{
struct page *page;
int locked = 0;
page = find_get_page(inode->i_mapping, index);
if (page)
goto check_page;

page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (unlikely(!page)) {
dprintk("%s oom\n", __func__);
return ERR_PTR(-ENOMEM);
}
locked = 1;

check_page:
/* PageDirty: Other will write this out
* PageWriteback: Other is writing this out
* PageUptodate: It was read before
*/
if (PageDirty(page) || PageWriteback(page)) {
print_page(page);
if (locked)
unlock_page(page);
page_cache_release(page);
return NULL;
}

if (!locked) {
lock_page(page);
locked = 1;
goto check_page;
}
if (!PageUptodate(page)) {
/* New page, readin or zero it */
init_page_for_write(page, cow_read);
}
set_page_writeback(page);
unlock_page(page);

return page;
}

static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{
Expand All @@ -508,22 +563,23 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
*/
par = alloc_parallel(wdata);
if (!par)
return PNFS_NOT_ATTEMPTED;
par->call_ops = *wdata->mds_ops;
par->call_ops.rpc_call_done = bl_rpc_do_nothing;
goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */

isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
wdata->pnfs_error = -EINVAL;
goto out;
goto out_mds;
}

/* First page inside INVALID extent */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
if (likely(!bl_push_one_short_extent(be->be_inval)))
par->bse_count++;
else
goto out_mds;
temp = offset >> PAGE_CACHE_SHIFT;
npg_zero = do_div(temp, npg_per_block);
isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
Expand All @@ -543,36 +599,16 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index,
(unsigned long long)isect);
page =
find_or_create_page(wdata->inode->i_mapping, index,
GFP_NOFS);
if (!page) {
dprintk("%s oom\n", __func__);
wdata->pnfs_error = -ENOMEM;
page = bl_find_get_zeroing_page(wdata->inode, index,
cow_read);
if (unlikely(IS_ERR(page))) {
wdata->pnfs_error = PTR_ERR(page);
goto out;
}

/* PageDirty: Other will write this out
* PageWriteback: Other is writing this out
* PageUptodate: It was read before
* sector_initialized: already written out
*/
if (PageDirty(page) || PageWriteback(page)) {
print_page(page);
unlock_page(page);
page_cache_release(page);
} else if (page == NULL)
goto next_page;
}
if (!PageUptodate(page)) {
/* New page, readin or zero it */
init_page_for_write(page, cow_read);
}
set_page_writeback(page);
unlock_page(page);

ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS,
NULL);
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
Expand All @@ -581,6 +617,19 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
wdata->pnfs_error = ret;
goto out;
}
if (likely(!bl_push_one_short_extent(be->be_inval)))
par->bse_count++;
else {
end_page_writeback(page);
page_cache_release(page);
wdata->pnfs_error = -ENOMEM;
goto out;
}
/* FIXME: This should be done in bi_end_io */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE);

bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
isect, page, be,
bl_end_io_write_zero, par);
Expand All @@ -589,10 +638,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
bio = NULL;
goto out;
}
/* FIXME: This should be done in bi_end_io */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE);
next_page:
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
Expand All @@ -616,13 +661,21 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
wdata->pnfs_error = -EINVAL;
goto out;
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
if (likely(!bl_push_one_short_extent(
be->be_inval)))
par->bse_count++;
else {
wdata->pnfs_error = -ENOMEM;
goto out;
}
}
extent_length = be->be_length -
(isect - be->be_f_offset);
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS,
NULL);
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
Expand Down Expand Up @@ -664,6 +717,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
kfree(par);
return PNFS_NOT_ATTEMPTED;
}

/* FIXME - range ignored */
Expand All @@ -690,11 +747,17 @@ static void
release_inval_marks(struct pnfs_inval_markings *marks)
{
struct pnfs_inval_tracking *pos, *temp;
struct pnfs_block_short_extent *se, *stemp;

list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
list_del(&pos->it_link);
kfree(pos);
}

list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
list_del(&se->bse_node);
kfree(se);
}
return;
}

Expand Down Expand Up @@ -779,16 +842,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
static void free_blk_mountid(struct block_mount_id *mid)
{
if (mid) {
struct pnfs_block_dev *dev;
spin_lock(&mid->bm_lock);
while (!list_empty(&mid->bm_devlist)) {
dev = list_first_entry(&mid->bm_devlist,
struct pnfs_block_dev,
bm_node);
struct pnfs_block_dev *dev, *tmp;

/* No need to take bm_lock as we are last user freeing bm_devlist */
list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
list_del(&dev->bm_node);
bl_free_block_dev(dev);
}
spin_unlock(&mid->bm_lock);
kfree(mid);
}
}
Expand Down
Loading

0 comments on commit a12587b

Please sign in to comment.