Skip to content

Commit

Permalink
NFS4.1 Fix data server connection race
Browse files Browse the repository at this point in the history
Unlike meta data server mounts which support multiple mount points to
the same server via struct nfs_server, data servers support a single connection.

Concurrent calls to setup the data server connection can race where the first
call allocates the nfs_client struct, and before the cache struct nfs_client
pointer can be set, a second call also tries to setup the connection, finds the
already allocated nfs_client, bumps the reference count, re-initializes the
session,etc. This results in a hanging data server session after umount.

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
  • Loading branch information
androsadamson authored and Trond Myklebust committed May 8, 2013
1 parent d497ab9 commit c23266d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
2 changes: 2 additions & 0 deletions fs/nfs/nfs4filelayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ struct nfs4_pnfs_ds {
struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
unsigned long ds_state;
#define NFS4DS_CONNECTING 0 /* ds is establishing connection */
};

struct nfs4_file_layout_dsaddr {
Expand Down
26 changes: 24 additions & 2 deletions fs/nfs/nfs4filelayoutdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,22 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
return flseg->fh_array[i];
}

static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
nfs_wait_bit_killable, TASK_KILLABLE);
}

static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
{
smp_mb__before_clear_bit();
clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
smp_mb__after_clear_bit();
wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
}


struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
Expand All @@ -791,16 +807,22 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
filelayout_mark_devid_invalid(devid);
return NULL;
}
if (ds->ds_clp)
return ds;

if (!ds->ds_clp) {
if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;

err = nfs4_ds_connect(s, ds);
if (err) {
nfs4_mark_deviceid_unavailable(devid);
return NULL;
ds = NULL;
}
nfs4_clear_ds_conn_bit(ds);
} else {
/* Either ds is connected, or ds is NULL */
nfs4_wait_ds_connect(ds);
}
return ds;
}
Expand Down

0 comments on commit c23266d

Please sign in to comment.