Skip to content

Commit

Permalink
Merge tag 'nfs-for-4.3-1' of git://git.linux-nfs.org/projects/trondmy…
Browse files Browse the repository at this point in the history
…/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable patches:
   - Fix atomicity of pNFS commit list updates
   - Fix NFSv4 handling of open(O_CREAT|O_EXCL|O_RDONLY)
   - nfs_set_pgio_error sometimes misses errors
   - Fix a thinko in xs_connect()
   - Fix borkage in _same_data_server_addrs_locked()
   - Fix a NULL pointer dereference of migration recovery ops for v4.2
     client
   - Don't let the ctime override attribute barriers.
   - Revert "NFSv4: Remove incorrect check in can_open_delegated()"
   - Ensure flexfiles pNFS driver updates the inode after write finishes
   - flexfiles must not pollute the attribute cache with attrbutes from
     the DS
   - Fix a protocol error in layoutreturn
   - Fix a protocol issue with NFSv4.1 CLOSE stateids

  Bugfixes + cleanups
   - pNFS blocks bugfixes from Christoph
   - Various cleanups from Anna
   - More fixes for delegation corner cases
   - Don't fsync twice for O_SYNC/IS_SYNC files
   - Fix pNFS and flexfiles layoutstats bugs
   - pnfs/flexfiles: avoid duplicate tracking of mirror data
   - pnfs: Fix layoutget/layoutreturn/return-on-close serialisation
     issues
   - pnfs/flexfiles: error handling retries a layoutget before fallback
     to MDS

  Features:
   - Full support for the OPEN NFS4_CREATE_EXCLUSIVE4_1 mode from
     Kinglong
   - More RDMA client transport improvements from Chuck
   - Removal of the deprecated ib_reg_phys_mr() and ib_rereg_phys_mr()
     verbs from the SUNRPC, Lustre and core infiniband tree.
   - Optimise away the close-to-open getattr if there is no cached data"

* tag 'nfs-for-4.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (108 commits)
  NFSv4: Respect the server imposed limit on how many changes we may cache
  NFSv4: Express delegation limit in units of pages
  Revert "NFS: Make close(2) asynchronous when closing NFS O_DIRECT files"
  NFS: Optimise away the close-to-open getattr if there is no cached data
  NFSv4.1/flexfiles: Clean up ff_layout_write_done_cb/ff_layout_commit_done_cb
  NFSv4.1/flexfiles: Mark the layout for return in ff_layout_io_track_ds_error()
  nfs: Remove unneeded checking of the return value from scnprintf
  nfs: Fix truncated client owner id without proto type
  NFSv4.1/flexfiles: Mark layout for return if the mirrors are invalid
  NFSv4.1/flexfiles: RW layouts are valid only if all mirrors are valid
  NFSv4.1/flexfiles: Fix incorrect usage of pnfs_generic_mark_devid_invalid()
  NFSv4.1/flexfiles: Fix freeing of mirrors
  NFSv4.1/pNFS: Don't request a minimal read layout beyond the end of file
  NFSv4.1/pnfs: Handle LAYOUTGET return values correctly
  NFSv4.1/pnfs: Don't ask for a read layout for an empty file.
  NFSv4.1: Fix a protocol issue with CLOSE stateids
  NFSv4.1/flexfiles: Don't mark the entire deviceid as bad for file errors
  SUNRPC: Prevent SYN+SYNACK+RST storms
  SUNRPC: xs_reset_transport must mark the connection as disconnected
  NFSv4.1/pnfs: Ensure layoutreturn reserves space for the opaque payload
  ...
  • Loading branch information
torvalds committed Sep 7, 2015
2 parents 77a7880 + 5445b1f commit 4e4adb2
Show file tree
Hide file tree
Showing 54 changed files with 1,321 additions and 1,089 deletions.
9 changes: 9 additions & 0 deletions Documentation/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2285,6 +2285,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
The default parameter value of '0' causes the kernel
not to attempt recovery of lost locks.

nfs4.layoutstats_timer =
[NFSv4.2] Change the rate at which the kernel sends
layoutstats to the pNFS metadata server.

Setting this to value to 0 causes the kernel to use
whatever value is the default set by the layout
driver. A non-zero value sets the minimum interval
in seconds between layoutstats transmissions.

nfsd.nfs4_disable_idmapping=
[NFSv4] When set to the default of '1', the NFSv4
server will return only numeric uids and gids to
Expand Down
67 changes: 0 additions & 67 deletions drivers/infiniband/core/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1144,73 +1144,6 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
}
EXPORT_SYMBOL(ib_get_dma_mr);

struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start)
{
struct ib_mr *mr;
int err;

err = ib_check_mr_access(mr_access_flags);
if (err)
return ERR_PTR(err);

if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);

mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
mr_access_flags, iova_start);

if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}

return mr;
}
EXPORT_SYMBOL(ib_reg_phys_mr);

int ib_rereg_phys_mr(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start)
{
struct ib_pd *old_pd;
int ret;

ret = ib_check_mr_access(mr_access_flags);
if (ret)
return ret;

if (!mr->device->rereg_phys_mr)
return -ENOSYS;

if (atomic_read(&mr->usecnt))
return -EBUSY;

old_pd = mr->pd;

ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
phys_buf_array, num_phys_buf,
mr_access_flags, iova_start);

if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
atomic_dec(&old_pd->usecnt);
atomic_inc(&pd->usecnt);
}

return ret;
}
EXPORT_SYMBOL(ib_rereg_phys_mr);

int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
{
return mr->device->query_mr ?
Expand Down
19 changes: 1 addition & 18 deletions fs/nfs/blocklayout/blocklayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@

struct pnfs_block_dev;

enum pnfs_block_volume_type {
PNFS_BLOCK_VOLUME_SIMPLE = 0,
PNFS_BLOCK_VOLUME_SLICE = 1,
PNFS_BLOCK_VOLUME_CONCAT = 2,
PNFS_BLOCK_VOLUME_STRIPE = 3,
};

#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64

Expand Down Expand Up @@ -117,13 +110,6 @@ struct pnfs_block_dev {
struct pnfs_block_dev_map *map);
};

enum exstate4 {
PNFS_BLOCK_READWRITE_DATA = 0,
PNFS_BLOCK_READ_DATA = 1,
PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */
PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */
};

/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
Expand All @@ -134,15 +120,12 @@ struct pnfs_block_extent {
sector_t be_f_offset; /* the starting offset in the file */
sector_t be_length; /* the size of the extent */
sector_t be_v_offset; /* the starting offset in the volume */
enum exstate4 be_state; /* the state of this extent */
enum pnfs_block_extent_state be_state; /* the state of this extent */
#define EXTENT_WRITTEN 1
#define EXTENT_COMMITTING 2
unsigned int be_tag;
};

/* on the wire size of the extent */
#define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)

struct pnfs_block_layout {
struct pnfs_layout_hdr bl_layout;
struct rb_root bl_ext_rw;
Expand Down
9 changes: 7 additions & 2 deletions fs/nfs/blocklayout/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ bl_free_device(struct pnfs_block_dev *dev)
kfree(dev->children);
} else {
if (dev->bdev)
blkdev_put(dev->bdev, FMODE_READ);
blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
}
}

Expand Down Expand Up @@ -65,6 +65,11 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
return -EIO;
p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
b->simple.sigs[i].sig_len = be32_to_cpup(p++);
if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
pr_info("signature too long: %d\n",
b->simple.sigs[i].sig_len);
return -EIO;
}

p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
if (!p)
Expand Down Expand Up @@ -195,7 +200,7 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
if (!dev)
return -EIO;

d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
if (IS_ERR(d->bdev)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
Expand Down
19 changes: 11 additions & 8 deletions fs/nfs/blocklayout/extent_tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,12 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
return err;
}

static size_t ext_tree_layoutupdate_size(size_t count)
{
return sizeof(__be32) /* number of entries */ +
PNFS_BLOCK_EXTENT_SIZE * count;
}

static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
size_t buffer_size)
{
Expand Down Expand Up @@ -489,7 +495,7 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
continue;

(*count)++;
if (*count * BL_EXTENT_SIZE > buffer_size) {
if (ext_tree_layoutupdate_size(*count) > buffer_size) {
/* keep counting.. */
ret = -ENOSPC;
continue;
Expand Down Expand Up @@ -530,7 +536,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);

buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count;
buffer_size = ext_tree_layoutupdate_size(count);
count = 0;

arg->layoutupdate_pages =
Expand All @@ -549,17 +555,14 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
}

*start_p = cpu_to_be32(count);
arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count;
arg->layoutupdate_len = ext_tree_layoutupdate_size(count);

if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
__be32 *p = start_p;
void *p = start_p, *end = p + arg->layoutupdate_len;
int i = 0;

for (p = start_p;
p < start_p + arg->layoutupdate_len;
p += PAGE_SIZE) {
for ( ; p < end; p += PAGE_SIZE)
arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
}
}

dprintk("%s found %zu ranges\n", __func__, count);
Expand Down
4 changes: 0 additions & 4 deletions fs/nfs/callback.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,6 @@ nfs41_callback_up(struct svc_serv *serv)
spin_lock_init(&serv->sv_cb_lock);
init_waitqueue_head(&serv->sv_cb_waitq);
rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
if (IS_ERR(rqstp)) {
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
}
dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp));
return rqstp;
}
Expand Down
9 changes: 7 additions & 2 deletions fs/nfs/callback_proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));

inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL)
if (inode == NULL) {
trace_nfs4_cb_getattr(cps->clp, &args->fh, NULL,
-ntohl(res->status));
goto out;
}
nfsi = NFS_I(inode);
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
Expand All @@ -60,6 +63,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
res->status = 0;
out_iput:
rcu_read_unlock();
trace_nfs4_cb_getattr(cps->clp, &args->fh, inode, -ntohl(res->status));
iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
Expand Down Expand Up @@ -194,6 +198,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
pnfs_put_layout_hdr(lo);
trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv);
iput(ino);
out:
return rv;
Expand Down Expand Up @@ -554,7 +559,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
status = htonl(NFS4_OK);

nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid);
nfs41_server_notify_target_slotid_update(cps->clp);
nfs41_notify_server(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
Expand Down
113 changes: 2 additions & 111 deletions fs/nfs/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/metrics.h>
Expand Down Expand Up @@ -285,116 +286,6 @@ void nfs_put_client(struct nfs_client *clp)
}
EXPORT_SYMBOL_GPL(nfs_put_client);

#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
* Test if two ip6 socket addresses refer to the same socket by
* comparing relevant fields. The padding bytes specifically, are not
* compared. sin6_flowinfo is not compared because it only affects QoS
* and sin6_scope_id is only compared if the address is "link local"
* because "link local" addresses need only be unique to a specific
* link. Conversely, ordinary unicast addresses might have different
* sin6_scope_id.
*
* The caller should ensure both socket addresses are AF_INET6.
*/
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;

if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
return 0;
else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
return sin1->sin6_scope_id == sin2->sin6_scope_id;

return 1;
}
#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
return 0;
}
#endif

/*
* Test if two ip4 socket addresses refer to the same socket, by
* comparing relevant fields. The padding bytes specifically, are
* not compared.
*
* The caller should ensure both socket addresses are AF_INET.
*/
static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;

return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
}

static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;

return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
(sin1->sin6_port == sin2->sin6_port);
}

static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;

return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
(sin1->sin_port == sin2->sin_port);
}

#if defined(CONFIG_NFS_V4_1)
/*
* Test if two socket addresses represent the same actual socket,
* by comparing (only) relevant fields, excluding the port number.
*/
int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
if (sa1->sa_family != sa2->sa_family)
return 0;

switch (sa1->sa_family) {
case AF_INET:
return nfs_sockaddr_match_ipaddr4(sa1, sa2);
case AF_INET6:
return nfs_sockaddr_match_ipaddr6(sa1, sa2);
}
return 0;
}
EXPORT_SYMBOL_GPL(nfs_sockaddr_match_ipaddr);
#endif /* CONFIG_NFS_V4_1 */

/*
* Test if two socket addresses represent the same actual socket,
* by comparing (only) relevant fields, including the port number.
*/
static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
if (sa1->sa_family != sa2->sa_family)
return 0;

switch (sa1->sa_family) {
case AF_INET:
return nfs_sockaddr_cmp_ip4(sa1, sa2);
case AF_INET6:
return nfs_sockaddr_cmp_ip6(sa1, sa2);
}
return 0;
}

/*
* Find an nfs_client on the list that matches the initialisation data
* that is supplied.
Expand All @@ -421,7 +312,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
if (clp->cl_minorversion != data->minorversion)
continue;
/* Match the full socket address */
if (!nfs_sockaddr_cmp(sap, clap))
if (!rpc_cmp_addr_port(sap, clap))
continue;

atomic_inc(&clp->cl_count);
Expand Down
Loading

0 comments on commit 4e4adb2

Please sign in to comment.