From af76fc6c158d5e70764c9cc277aefe7a134436fd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 5 Jun 2019 13:46:29 -0400 Subject: [PATCH 01/55] NFSD fill-in netloc4 structure nfs.4 defines nfs42_netaddr structure that represents netloc4. Populate needed fields from the sockaddr structure. This will be used by flexfiles and 4.2 inter copy Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfsd.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 57b93d95fa5c8..0ff6ef964a486 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -387,6 +388,37 @@ void nfsd_lockd_shutdown(void); extern const u32 nfsd_suppattrs[3][3]; +static inline u32 nfsd4_set_netaddr(struct sockaddr *addr, + struct nfs42_netaddr *netaddr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + unsigned int port; + size_t ret_addr, ret_port; + + switch (addr->sa_family) { + case AF_INET: + port = ntohs(sin->sin_port); + sprintf(netaddr->netid, "tcp"); + netaddr->netid_len = 3; + break; + case AF_INET6: + port = ntohs(sin6->sin6_port); + sprintf(netaddr->netid, "tcp6"); + netaddr->netid_len = 4; + break; + default: + return nfserr_inval; + } + ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr)); + ret_port = snprintf(netaddr->addr + ret_addr, + RPCBIND_MAXUADDRLEN + 1 - ret_addr, + ".%u.%u", port >> 8, port & 0xff); + WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr); + netaddr->addr_len = ret_addr + ret_port; + return 0; +} + static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2) { return !((bm1[0] & ~bm2[0]) || From 84e1b21d5ec4cc1b005586f32c67c046ea4ffb8a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 13 Sep 2019 14:00:57 -0400 Subject: [PATCH 02/55] NFSD add ca_source_server<> to COPY Decode the ca_source_server list that's sent but only use the first one. Presence of non-zero list indicates an "inter" copy. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4xdr.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++- fs/nfsd/xdr4.h | 12 +++++---- 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d2dc4c0e22e89..a1d839406dc1e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "idmap.h" #include "acl.h" @@ -1744,10 +1745,47 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) DECODE_TAIL; } +static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, + struct nl4_server *ns) +{ + DECODE_HEAD; + struct nfs42_netaddr *naddr; + + READ_BUF(4); + ns->nl4_type = be32_to_cpup(p++); + + /* currently support for 1 inter-server source server */ + switch (ns->nl4_type) { + case NL4_NETADDR: + naddr = &ns->u.nl4_addr; + + READ_BUF(4); + naddr->netid_len = be32_to_cpup(p++); + if (naddr->netid_len > RPCBIND_MAXNETIDLEN) + goto xdr_error; + + READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */ + COPYMEM(naddr->netid, naddr->netid_len); + + naddr->addr_len = be32_to_cpup(p++); + if (naddr->addr_len > RPCBIND_MAXUADDRLEN) + goto xdr_error; + + READ_BUF(naddr->addr_len); + COPYMEM(naddr->addr, naddr->addr_len); + break; + default: + goto xdr_error; + } + DECODE_TAIL; +} + static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { DECODE_HEAD; + struct nl4_server *ns_dummy; + int i, count; status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); if (status) @@ -1762,7 +1800,32 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) p = xdr_decode_hyper(p, ©->cp_count); p++; /* ca_consecutive: we always do consecutive copies */ copy->cp_synchronous = be32_to_cpup(p++); - /* tmp = be32_to_cpup(p); Source server list not supported */ + + count = be32_to_cpup(p++); + + copy->cp_intra = false; + if (count == 0) { /* intra-server copy */ + copy->cp_intra = true; + goto intra; + } + + /* decode all the supplied server addresses but use first */ + status = nfsd4_decode_nl4_server(argp, ©->cp_src); + if (status) + return status; + + ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); + if (ns_dummy == NULL) + return nfserrno(-ENOMEM); + for (i = 0; i < count - 1; i++) { + status = nfsd4_decode_nl4_server(argp, ns_dummy); + if (status) { + kfree(ns_dummy); + return status; + } + } + kfree(ns_dummy); +intra: DECODE_TAIL; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f4737d66ee984..e815a9cc3b05a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -518,11 +518,13 @@ struct nfsd42_write_res { struct nfsd4_copy { /* request */ - stateid_t cp_src_stateid; - stateid_t cp_dst_stateid; - u64 cp_src_pos; - u64 cp_dst_pos; - u64 cp_count; + stateid_t cp_src_stateid; + stateid_t cp_dst_stateid; + u64 cp_src_pos; + u64 cp_dst_pos; + u64 cp_count; + struct nl4_server cp_src; + bool cp_intra; /* both */ bool cp_synchronous; From 51911868fc62f6b1bc460ea2d8bddece6c72e467 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 8 Aug 2019 11:14:59 -0400 Subject: [PATCH 03/55] NFSD COPY_NOTIFY xdr Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4proc.c | 28 +++++++++++++++ fs/nfsd/nfs4xdr.c | 90 ++++++++++++++++++++++++++++++++++++++++++++-- fs/nfsd/xdr4.h | 13 +++++++ 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4798667af647c..d3f60562ac1d6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1339,6 +1339,13 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp, return status; } +static __be32 +nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + return nfserr_notsupp; +} + static __be32 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_fallocate *fallocate, int flags) @@ -2292,6 +2299,21 @@ static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } +static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 3 /* cnr_lease_time */ + + 1 /* We support one cnr_source_server */ + + 1 /* cnr_stateid seq */ + + op_encode_stateid_maxsz /* cnr_stateid */ + + 1 /* num cnr_source_server*/ + + 1 /* nl4_type */ + + 1 /* nl4 size */ + + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */) + * sizeof(__be32); +} + #ifdef CONFIG_NFSD_PNFS static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { @@ -2716,6 +2738,12 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_OFFLOAD_CANCEL", .op_rsize_bop = nfsd4_only_status_rsize, }, + [OP_COPY_NOTIFY] = { + .op_func = nfsd4_copy_notify, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_COPY_NOTIFY", + .op_rsize_bop = nfsd4_copy_notify_rsize, + }, }; /** diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a1d839406dc1e..1d8a08b84e32f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1837,6 +1837,18 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid(argp, &os->stateid); } +static __be32 +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, + struct nfsd4_copy_notify *cn) +{ + int status; + + status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid); + if (status) + return status; + return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); +} + static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { @@ -1938,7 +1950,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { /* new operations for NFSv4.2 */ [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -4306,6 +4318,46 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, return nfs_ok; } +static __be32 +nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) +{ + struct xdr_stream *xdr = &resp->xdr; + struct nfs42_netaddr *addr; + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = cpu_to_be32(ns->nl4_type); + + switch (ns->nl4_type) { + case NL4_NETADDR: + addr = &ns->u.nl4_addr; + + /* netid_len, netid, uaddr_len, uaddr (port included + * in RPCBIND_MAXUADDRLEN) + */ + p = xdr_reserve_space(xdr, + 4 /* netid len */ + + (XDR_QUADLEN(addr->netid_len) * 4) + + 4 /* uaddr len */ + + (XDR_QUADLEN(addr->addr_len) * 4)); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(addr->netid_len); + p = xdr_encode_opaque_fixed(p, addr->netid, + addr->netid_len); + *p++ = cpu_to_be32(addr->addr_len); + p = xdr_encode_opaque_fixed(p, addr->addr, + addr->addr_len); + break; + default: + WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR); + return nfserr_inval; + } + + return 0; +} + static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_copy *copy) @@ -4339,6 +4391,40 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; } +static __be32 +nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_copy_notify *cn) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + if (nfserr) + return nfserr; + + /* 8 sec, 4 nsec */ + p = xdr_reserve_space(xdr, 12); + if (!p) + return nfserr_resource; + + /* cnr_lease_time */ + p = xdr_encode_hyper(p, cn->cpn_sec); + *p++ = cpu_to_be32(cn->cpn_nsec); + + /* cnr_stateid */ + nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid); + if (nfserr) + return nfserr; + + /* cnr_src.nl_nsvr */ + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(1); + + return nfsd42_encode_nl4_server(resp, &cn->cpn_src); +} + static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_seek *seek) @@ -4436,7 +4522,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { /* NFSv4.2 operations */ [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index e815a9cc3b05a..8231fe04bec0f 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -570,6 +570,18 @@ struct nfsd4_offload_status { u32 status; }; +struct nfsd4_copy_notify { + /* request */ + stateid_t cpn_src_stateid; + struct nl4_server cpn_dst; + + /* response */ + stateid_t cpn_cnr_stateid; + u64 cpn_sec; + u32 cpn_nsec; + struct nl4_server cpn_src; +}; + struct nfsd4_op { int opnum; const struct nfsd4_operation * opdesc; @@ -629,6 +641,7 @@ struct nfsd4_op { struct nfsd4_clone clone; struct nfsd4_copy copy; struct nfsd4_offload_status offload_status; + struct nfsd4_copy_notify copy_notify; struct nfsd4_seek seek; } u; struct nfs4_replay * replay; From 624322f1adc58acd0b69f77a6ddc764207e97241 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 4 Oct 2019 16:34:26 -0400 Subject: [PATCH 04/55] NFSD add COPY_NOTIFY operation Introducing the COPY_NOTIFY operation. Create a new unique stateid that will keep track of the copy state and the upcoming READs that will use that stateid. Each associated parent stateid has a list of copy notify stateids. A copy notify structure makes a copy of the parent stateid and a clientid and will use it to look up the parent stateid during the READ request (suggested by Trond Myklebust ). At nfs4_put_stid() time, we walk the list of the associated copy notify stateids and delete them. Laundromat thread will traverse globally stored copy notify stateid in idr and notice if any haven't been referenced in the lease period, if so, it'll remove them. Return single netaddr to advertise to the copy. Suggested-by: Trond Myklebust Signed-off-by: Olga Kornievskaia Signed-off-by: Andy Adamson --- fs/nfsd/nfs4proc.c | 58 +++++++++++++++++++----- fs/nfsd/nfs4state.c | 106 +++++++++++++++++++++++++++++++++++++++----- fs/nfsd/state.h | 31 +++++++++++-- fs/nfsd/xdr4.h | 2 +- 4 files changed, 173 insertions(+), 24 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d3f60562ac1d6..dee5aec098fd9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "idmap.h" #include "cache.h" @@ -776,7 +777,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, - &read->rd_nf); + &read->rd_nf, NULL); if (status) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; @@ -948,7 +949,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, - WR_STATE, NULL); + WR_STATE, NULL, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -999,7 +1000,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, trace_nfsd_write_start(rqstp, &cstate->current_fh, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - stateid, WR_STATE, &nf); + stateid, WR_STATE, &nf, NULL); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; @@ -1034,14 +1035,14 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_nofilehandle; status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, - src_stateid, RD_STATE, src); + src_stateid, RD_STATE, src, NULL); if (status) { dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); goto out; } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - dst_stateid, WR_STATE, dst); + dst_stateid, WR_STATE, dst, NULL); if (status) { dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); goto out_put_src; @@ -1221,7 +1222,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) static void cleanup_async_copy(struct nfsd4_copy *copy) { - nfs4_free_cp_state(copy); + nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); @@ -1275,7 +1276,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out; - if (!nfs4_init_cp_state(nn, copy)) { + if (!nfs4_init_copy_state(nn, copy)) { kfree(async_copy); goto out; } @@ -1343,7 +1344,44 @@ static __be32 nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - return nfserr_notsupp; + struct nfsd4_copy_notify *cn = &u->copy_notify; + __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_stid *stid; + struct nfs4_cpntf_state *cps; + struct nfs4_client *clp = cstate->clp; + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &cn->cpn_src_stateid, RD_STATE, NULL, + &stid); + if (status) + return status; + + cn->cpn_sec = nn->nfsd4_lease; + cn->cpn_nsec = 0; + + status = nfserrno(-ENOMEM); + cps = nfs4_alloc_init_cpntf_state(nn, stid); + if (!cps) + goto out; + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); + memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); + memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); + + /* For now, only return one server address in cpn_src, the + * address used by the client to connect to this server. + */ + cn->cpn_src.nl4_type = NL4_NETADDR; + status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, + &cn->cpn_src.u.nl4_addr); + WARN_ON_ONCE(status); + if (status) { + nfs4_put_cpntf_state(nn, cps); + goto out; + } +out: + nfs4_put_stid(stid); + return status; } static __be32 @@ -1355,7 +1393,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, - WR_STATE, &nf); + WR_STATE, &nf, NULL); if (status != nfs_ok) { dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); return status; @@ -1414,7 +1452,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, - RD_STATE, &nf); + RD_STATE, &nf, NULL); if (status) { dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 369e574c5092a..ae3dff2101085 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -80,6 +80,7 @@ static u64 current_sessionid = 1; static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); +static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); /* Locking: */ @@ -722,6 +723,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla /* Will be incremented before return to client: */ refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); + INIT_LIST_HEAD(&stid->sc_cp_list); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -741,30 +743,76 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla /* * Create a unique stateid_t to represent each COPY. */ -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, + unsigned char sc_type) { int new_id; + stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time; + stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->sc_type = sc_type; + idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); - new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT); + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); + stid->stid.si_opaque.so_id = new_id; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; - copy->cp_stateid.si_opaque.so_id = new_id; - copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; - copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; return 1; } -void nfs4_free_cp_state(struct nfsd4_copy *copy) +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +{ + return nfs4_init_cp_state(nn, ©->cp_stateid, NFS4_COPY_STID); +} + +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid) +{ + struct nfs4_cpntf_state *cps; + + cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); + if (!cps) + return NULL; + cps->cpntf_time = get_seconds(); + refcount_set(&cps->cp_stateid.sc_count, 1); + if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) + goto out_free; + spin_lock(&nn->s2s_cp_lock); + list_add(&cps->cp_list, &p_stid->sc_cp_list); + spin_unlock(&nn->s2s_cp_lock); + return cps; +out_free: + kfree(cps); + return NULL; +} + +void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; + WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); - idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id); + idr_remove(&nn->s2s_cp_stateids, + copy->cp_stateid.stid.si_opaque.so_id); + spin_unlock(&nn->s2s_cp_lock); +} + +static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid) +{ + struct nfs4_cpntf_state *cps; + struct nfsd_net *nn; + + nn = net_generic(net, nfsd_net_id); + spin_lock(&nn->s2s_cp_lock); + while (!list_empty(&stid->sc_cp_list)) { + cps = list_first_entry(&stid->sc_cp_list, + struct nfs4_cpntf_state, cp_list); + _free_cpntf_state_locked(nn, cps); + } spin_unlock(&nn->s2s_cp_lock); } @@ -915,6 +963,7 @@ nfs4_put_stid(struct nfs4_stid *s) return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); if (fp) @@ -5215,6 +5264,9 @@ nfs4_laundromat(struct nfsd_net *nn) struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; + struct nfs4_cpntf_state *cps; + copy_stateid_t *cps_t; + int i; dprintk("NFSD: laundromat service - starting\n"); @@ -5225,6 +5277,17 @@ nfs4_laundromat(struct nfsd_net *nn) dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); + + spin_lock(&nn->s2s_cp_lock); + idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { + cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); + if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && + !time_after((unsigned long)cps->cpntf_time, + (unsigned long)cutoff)) + _free_cpntf_state_locked(nn, cps); + } + spin_unlock(&nn->s2s_cp_lock); + spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); @@ -5600,6 +5663,24 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, out: return status; } +static void +_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) +{ + WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) + return; + list_del(&cps->cp_list); + idr_remove(&nn->s2s_cp_stateids, + cps->cp_stateid.stid.si_opaque.so_id); + kfree(cps); +} + +void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) +{ + spin_lock(&nn->s2s_cp_lock); + _free_cpntf_state_locked(nn, cps); + spin_unlock(&nn->s2s_cp_lock); +} /* * Checks for stateid operations @@ -5607,7 +5688,8 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, - stateid_t *stateid, int flags, struct nfsd_file **nfp) + stateid_t *stateid, int flags, struct nfsd_file **nfp, + struct nfs4_stid **cstid) { struct inode *ino = d_inode(fhp->fh_dentry); struct net *net = SVC_NET(rqstp); @@ -5656,8 +5738,12 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, if (status == nfs_ok && nfp) status = nfs4_check_file(rqstp, fhp, s, nfp, flags); out: - if (s) - nfs4_put_stid(s); + if (s) { + if (!status && cstid) + *cstid = s; + else + nfs4_put_stid(s); + } return status; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index d61b83b9654c5..35eb7170aefcb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -56,6 +56,14 @@ typedef struct { stateid_opaque_t si_opaque; } stateid_t; +typedef struct { + stateid_t stid; +#define NFS4_COPY_STID 1 +#define NFS4_COPYNOTIFY_STID 2 + unsigned char sc_type; + refcount_t sc_count; +} copy_stateid_t; + #define STATEID_FMT "(%08x/%08x/%08x/%08x)" #define STATEID_VAL(s) \ (s)->si_opaque.so_clid.cl_boot, \ @@ -96,6 +104,7 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 + struct list_head sc_cp_list; unsigned char sc_type; stateid_t sc_stateid; spinlock_t sc_lock; @@ -104,6 +113,17 @@ struct nfs4_stid { void (*sc_free)(struct nfs4_stid *); }; +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the + * parent OPEN/LOCK/DELEG stateid. + */ +struct nfs4_cpntf_state { + copy_stateid_t cp_stateid; + struct list_head cp_list; /* per parent nfs4_stid */ + stateid_t cp_p_stateid; /* copy of parent's stateid */ + clientid_t cp_p_clid; /* copy of parent's clid */ + time_t cpntf_time; /* last time stateid used */ +}; + /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -618,14 +638,17 @@ struct nfsd4_copy; extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, - stateid_t *stateid, int flags, struct nfsd_file **filp); + stateid_t *stateid, int flags, struct nfsd_file **filp, + struct nfs4_stid **cstid); __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)); -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy); -void nfs4_free_cp_state(struct nfsd4_copy *copy); +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy); +void nfs4_free_copy_state(struct nfsd4_copy *copy); +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); @@ -655,6 +678,8 @@ void put_nfs4_file(struct nfs4_file *fi); extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); +extern void nfs4_put_cpntf_state(struct nfsd_net *nn, + struct nfs4_cpntf_state *cps); static inline void get_nfs4_file(struct nfs4_file *fi) { refcount_inc(&fi->fi_ref); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 8231fe04bec0f..2937e06f35548 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -542,7 +542,7 @@ struct nfsd4_copy { struct nfsd_file *nf_src; struct nfsd_file *nf_dst; - stateid_t cp_stateid; + copy_stateid_t cp_stateid; struct list_head copies; struct task_struct *copy_task; From b7342204253aaa1ce8351e0d94b43f98c8706cee Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 6 Sep 2019 15:17:21 -0400 Subject: [PATCH 05/55] NFSD check stateids against copy stateids Incoming stateid (used by a READ) could be a saved copy stateid. Using the provided stateid, look it up in the list of copy_notify stateids. If found, use the parent's stateid and parent's clid to look up the parent's stid to do the appropriate checks. Update the copy notify timestamp (cpntf_time) with current time this making it 'active' so that laundromat thread will not delete copy notify state. Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4state.c | 74 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ae3dff2101085..31b71aed4230b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4539,7 +4539,8 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 static __be32 lookup_clientid(clientid_t *clid, struct nfsd4_compound_state *cstate, - struct nfsd_net *nn) + struct nfsd_net *nn, + bool sessions) { struct nfs4_client *found; @@ -4560,7 +4561,7 @@ static __be32 lookup_clientid(clientid_t *clid, */ WARN_ON_ONCE(cstate->session); spin_lock(&nn->client_lock); - found = find_confirmed_client(clid, false, nn); + found = find_confirmed_client(clid, sessions, nn); if (!found) { spin_unlock(&nn->client_lock); return nfserr_expired; @@ -4593,7 +4594,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - status = lookup_clientid(clientid, cstate, nn); + status = lookup_clientid(clientid, cstate, nn, false); if (status) return status; clp = cstate->clp; @@ -5182,7 +5183,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) goto out; clp = cstate->clp; @@ -5584,7 +5585,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn, + false); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; @@ -5674,6 +5676,59 @@ _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) cps->cp_stateid.stid.si_opaque.so_id); kfree(cps); } +/* + * A READ from an inter server to server COPY will have a + * copy stateid. Look up the copy notify stateid from the + * idr structure and take a reference on it. + */ +static __be32 _find_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_cpntf_state **cps) +{ + copy_stateid_t *cps_t; + struct nfs4_cpntf_state *state = NULL; + + if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id) + return nfserr_bad_stateid; + spin_lock(&nn->s2s_cp_lock); + cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id); + if (cps_t) { + state = container_of(cps_t, struct nfs4_cpntf_state, + cp_stateid); + if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) + return nfserr_bad_stateid; + refcount_inc(&state->cp_stateid.sc_count); + } + spin_unlock(&nn->s2s_cp_lock); + if (!state) + return nfserr_bad_stateid; + *cps = state; + return 0; +} + +static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_stid **stid) +{ + __be32 status; + struct nfs4_cpntf_state *cps = NULL; + struct nfsd4_compound_state cstate; + + status = _find_cpntf_state(nn, st, &cps); + if (status) + return status; + + cps->cpntf_time = get_seconds(); + memset(&cstate, 0, sizeof(cstate)); + status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); + if (status) + goto out; + status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + stid, nn); + put_client_renew(cstate.clp); +out: + nfs4_put_cpntf_state(nn, cps); + return status; +} void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { @@ -5711,6 +5766,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); + if (status == nfserr_bad_stateid) + status = find_cpntf_state(nn, stateid, &s); if (status) return status; status = nfsd4_stid_check_stateid_generation(stateid, s, @@ -6743,7 +6800,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, cstate, nn); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn, + false); if (status) goto out; } @@ -6927,7 +6985,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return status; @@ -7074,7 +7132,7 @@ nfs4_check_open_reclaim(clientid_t *clid, __be32 status; /* find clientid in conf_id_hashtbl */ - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return nfserr_reclaim_bad; From 51100d2b87cba12b09db79fa6577adccc0c2d14f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 13 Sep 2018 13:58:24 -0400 Subject: [PATCH 06/55] NFSD generalize nfsd4_compound_state flag names Allow for sid_flag field non-stateid use. Signed-off-by: Andy Adamson --- fs/nfsd/nfs4proc.c | 8 ++++---- fs/nfsd/nfs4state.c | 7 ++++--- fs/nfsd/xdr4.h | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dee5aec098fd9..8cc9f6e91874c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -531,9 +531,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_restorefh; fh_dup2(&cstate->current_fh, &cstate->save_fh); - if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) { + if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) { memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } return nfs_ok; } @@ -543,9 +543,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { fh_dup2(&cstate->save_fh, &cstate->current_fh); - if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { + if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) { memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG); } return nfs_ok; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 31b71aed4230b..ed5e80b3ac1f7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7930,7 +7930,8 @@ nfs4_state_shutdown(void) static void get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { - if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) + if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) && + CURRENT_STATEID(stateid)) memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); } @@ -7939,14 +7940,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (cstate->minorversion) { memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } } void clear_current_stateid(struct nfsd4_compound_state *cstate) { - CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } /* diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2937e06f35548..0b4fe0795b720 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -46,9 +46,9 @@ #define CURRENT_STATE_ID_FLAG (1<<0) #define SAVED_STATE_ID_FLAG (1<<1) -#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f)) -#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f)) -#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f)) +#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f)) +#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f)) +#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f)) struct nfsd4_compound_state { struct svc_fh current_fh; From b9e8638e3d9ed8334f1f7071e081860aac37e83e Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 7 Oct 2019 10:56:48 -0400 Subject: [PATCH 07/55] NFSD: allow inter server COPY to have a STALE source server fh The inter server to server COPY source server filehandle is a foreign filehandle as the COPY is sent to the destination server. Signed-off-by: Olga Kornievskaia --- fs/nfsd/Kconfig | 10 ++++++++ fs/nfsd/nfs4proc.c | 57 ++++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/nfsfh.h | 5 +++- fs/nfsd/xdr4.h | 1 + 4 files changed, 68 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f2f81561ebb65..f368f3215f88f 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -134,6 +134,16 @@ config NFSD_FLEXFILELAYOUT If unsure, say N. +config NFSD_V4_2_INTER_SSC + bool "NFSv4.2 inter server to server COPY" + depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 + help + This option enables support for NFSv4.2 inter server to + server copy where the destination server calls the NFSv4.2 + client to read the data to copy from the source server. + + If unsure, say N. + config NFSD_V4_SECURITY_LABEL bool "Provide Security Label support for NFSv4 server" depends on NFSD_V4 && SECURITY diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8cc9f6e91874c..816777c12bc78 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -504,12 +504,20 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_putfh *putfh = &u->putfh; + __be32 ret; fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); + ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + if (ret == nfserr_stale && putfh->no_verify) { + SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN); + ret = 0; + } +#endif + return ret; } static __be32 @@ -1957,6 +1965,45 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, - rqstp->rq_auth_slack; } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +static void +check_if_stalefh_allowed(struct nfsd4_compoundargs *args) +{ + struct nfsd4_op *op, *current_op = NULL, *saved_op = NULL; + struct nfsd4_copy *copy; + struct nfsd4_putfh *putfh; + int i; + + /* traverse all operation and if it's a COPY compound, mark the + * source filehandle to skip verification + */ + for (i = 0; i < args->opcnt; i++) { + op = &args->ops[i]; + if (op->opnum == OP_PUTFH) + current_op = op; + else if (op->opnum == OP_SAVEFH) + saved_op = current_op; + else if (op->opnum == OP_RESTOREFH) + current_op = saved_op; + else if (op->opnum == OP_COPY) { + copy = (struct nfsd4_copy *)&op->u; + if (!saved_op) { + op->status = nfserr_nofilehandle; + return; + } + putfh = (struct nfsd4_putfh *)&saved_op->u; + if (!copy->cp_intra) + putfh->no_verify = true; + } + } +} +#else +static void +check_if_stalefh_allowed(struct nfsd4_compoundargs *args) +{ +} +#endif + /* * COMPOUND call. */ @@ -2005,6 +2052,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) resp->opcnt = 1; goto encode_op; } + check_if_stalefh_allowed(args); trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { @@ -2020,13 +2068,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; } - - if (!current_fh->fh_dentry) { + if (!current_fh->fh_dentry && + !HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) { if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (current_fh->fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export && + current_fh->fh_export->ex_fslocs.migrated && !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 755e256a91039..b9c75680bc316 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -35,7 +35,7 @@ typedef struct svc_fh { bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ - + int fh_flags; /* FH flags */ #ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ @@ -56,6 +56,9 @@ typedef struct svc_fh { #endif /* CONFIG_NFSD_V3 */ } svc_fh; +#define NFSD4_FH_FOREIGN (1<<0) +#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) +#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f)) enum nfsd_fsid { FSID_DEV = 0, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0b4fe0795b720..b16f602af8f1b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -221,6 +221,7 @@ struct nfsd4_lookup { struct nfsd4_putfh { u32 pf_fhlen; /* request */ char *pf_fhval; /* request */ + bool no_verify; /* represents foreigh fh */ }; struct nfsd4_open { From ce0887ac96d35c7105090e166bb0807dc0a0e838 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 9 Oct 2019 11:50:48 -0400 Subject: [PATCH 08/55] NFSD add nfs4 inter ssc to nfsd4_copy Given a universal address, mount the source server from the destination server. Use an internal mount. Call the NFS client nfs42_ssc_open to obtain the NFS struct file suitable for nfsd_copy_range. Ability to do "inter" server-to-server depends on the an nfsd kernel parameter "inter_copy_offload_enable". Signed-off-by: Olga Kornievskaia --- fs/nfsd/nfs4proc.c | 293 ++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/nfs4state.c | 15 ++- fs/nfsd/nfssvc.c | 6 + fs/nfsd/state.h | 3 + fs/nfsd/xdr4.h | 5 + 5 files changed, 295 insertions(+), 27 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 816777c12bc78..fc72f57297327 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1144,6 +1144,208 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) while ((copy = nfsd4_get_copy(clp)) != NULL) nfsd4_stop_copy(copy); } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid); +extern void nfs42_ssc_close(struct file *filep); + +extern void nfs_sb_deactive(struct super_block *sb); + +#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" + +/** + * Support one copy source server for now. + */ +static __be32 +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, + struct vfsmount **mount) +{ + struct file_system_type *type; + struct vfsmount *ss_mnt; + struct nfs42_netaddr *naddr; + struct sockaddr_storage tmp_addr; + size_t tmp_addrlen, match_netid_len = 3; + char *startsep = "", *endsep = "", *match_netid = "tcp"; + char *ipaddr, *dev_name, *raw_data; + int len, raw_len, status = -EINVAL; + + naddr = &nss->u.nl4_addr; + tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, + naddr->addr_len, + (struct sockaddr *)&tmp_addr, + sizeof(tmp_addr)); + if (tmp_addrlen == 0) + goto out_err; + + if (tmp_addr.ss_family == AF_INET6) { + startsep = "["; + endsep = "]"; + match_netid = "tcp6"; + match_netid_len = 4; + } + + if (naddr->netid_len != match_netid_len || + strncmp(naddr->netid, match_netid, naddr->netid_len)) + goto out_err; + + /* Construct the raw data for the vfs_kern_mount call */ + len = RPC_MAX_ADDRBUFLEN + 1; + ipaddr = kzalloc(len, GFP_KERNEL); + if (!ipaddr) + goto out_err; + + rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len); + + /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/ + + raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr); + raw_data = kzalloc(raw_len, GFP_KERNEL); + if (!raw_data) + goto out_free_ipaddr; + + snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr); + + status = -ENODEV; + type = get_fs_type("nfs"); + if (!type) + goto out_free_rawdata; + + /* Set the server: for the vfs_kern_mount call */ + dev_name = kzalloc(len + 5, GFP_KERNEL); + if (!dev_name) + goto out_free_rawdata; + snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); + + /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ + ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); + module_put(type->owner); + if (IS_ERR(ss_mnt)) + goto out_free_devname; + + status = 0; + *mount = ss_mnt; + +out_free_devname: + kfree(dev_name); +out_free_rawdata: + kfree(raw_data); +out_free_ipaddr: + kfree(ipaddr); +out_err: + return status; +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ + nfs_sb_deactive(ss_mnt->mnt_sb); + mntput(ss_mnt); +} + +/** + * nfsd4_setup_inter_ssc + * + * Verify COPY destination stateid. + * Connect to the source server with NFSv4.1. + * Create the source struct file for nfsd_copy_range. + * Called with COPY cstate: + * SAVED_FH: source filehandle + * CURRENT_FH: destination filehandle + * + * Returns errno (not nfserrxxx) + */ +static __be32 +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy, struct vfsmount **mount) +{ + struct svc_fh *s_fh = NULL; + stateid_t *s_stid = ©->cp_src_stateid; + __be32 status = -EINVAL; + + /* Verify the destination stateid and set dst struct file*/ + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + ©->cp_dst_stateid, + WR_STATE, ©->nf_dst, NULL); + if (status) + goto out; + + status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + if (status) + goto out; + + s_fh = &cstate->save_fh; + + copy->c_fh.size = s_fh->fh_handle.fh_size; + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + copy->stateid.seqid = s_stid->si_generation; + memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, + sizeof(stateid_opaque_t)); + + status = 0; +out: + return status; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, + struct nfsd_file *dst) +{ + nfs42_ssc_close(src->nf_file); + nfsd_file_put(src); + nfsd_file_put(dst); + mntput(ss_mnt); +} + +#else /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy, + struct vfsmount **mount) +{ + *mount = NULL; + return -EINVAL; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, + struct nfsd_file *dst) +{ +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ +} + +static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid) +{ + return NULL; +} +#endif /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + return nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, + ©->nf_src, ©->cp_dst_stateid, + ©->nf_dst); +} + +static void +nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) +{ + nfsd_file_put(src); + nfsd_file_put(dst); +} static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { @@ -1209,12 +1411,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) status = nfs_ok; } - nfsd_file_put(copy->nf_src); - nfsd_file_put(copy->nf_dst); + if (!copy->cp_intra) /* Inter server SSC */ + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, + copy->nf_dst); + else + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); + return status; } -static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) { dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; @@ -1224,8 +1430,17 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->nf_src = nfsd_file_get(src->nf_src); + dst->cp_intra = src->cp_intra; + if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + dst->nf_src = nfsd_file_get(src->nf_src); + memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); + memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); + memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); + dst->ss_mnt = src->ss_mnt; + + return 0; } static void cleanup_async_copy(struct nfsd4_copy *copy) @@ -1244,7 +1459,25 @@ static int nfsd4_do_async_copy(void *data) struct nfsd4_copy *copy = (struct nfsd4_copy *)data; struct nfsd4_copy *cb_copy; + if (!copy->cp_intra) { /* Inter server SSC */ + copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); + if (!copy->nf_src) { + copy->nfserr = nfserr_serverfault; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(copy->nf_src->nf_file)) { + kfree(copy->nf_src); + copy->nfserr = nfserr_offload_denied; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + } + copy->nfserr = nfsd4_do_copy(copy, 0); +do_callback: cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!cb_copy) goto out; @@ -1256,6 +1489,8 @@ static int nfsd4_do_async_copy(void *data) &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); nfsd4_run_cb(&cb_copy->cp_cb); out: + if (!copy->cp_intra) + kfree(copy->nf_src); cleanup_async_copy(copy); return 0; } @@ -1268,11 +1503,20 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, - ©->nf_src, ©->cp_dst_stateid, - ©->nf_dst); - if (status) - goto out; + if (!copy->cp_intra) { /* Inter server SSC */ + if (!inter_copy_offload_enable || copy->cp_synchronous) { + status = nfserr_notsupp; + goto out; + } + status = nfsd4_setup_inter_ssc(rqstp, cstate, copy, + ©->ss_mnt); + if (status) + return nfserr_offload_denied; + } else { + status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); + if (status) + return status; + } copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, @@ -1283,15 +1527,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) - goto out; - if (!nfs4_init_copy_state(nn, copy)) { - kfree(async_copy); - goto out; - } + goto out_err; + if (!nfs4_init_copy_state(nn, copy)) + goto out_err; refcount_set(&async_copy->refcount, 1); memcpy(©->cp_res.cb_stateid, ©->cp_stateid, sizeof(copy->cp_stateid)); - dup_copy_fields(copy, async_copy); + status = dup_copy_fields(copy, async_copy); + if (status) + goto out_err; async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) @@ -1302,13 +1546,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_unlock(&async_copy->cp_clp->async_lock); wake_up_process(async_copy->copy_task); status = nfs_ok; - } else + } else { status = nfsd4_do_copy(copy, 1); + } out: return status; out_err: if (async_copy) cleanup_async_copy(async_copy); + status = nfserrno(-ENOMEM); + if (!copy->cp_intra) + nfsd4_interssc_disconnect(copy->ss_mnt); goto out; } @@ -1319,7 +1567,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1335,17 +1583,18 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp, union nfsd4_op_u *u) { struct nfsd4_offload_status *os = &u->offload_status; - __be32 status = 0; struct nfsd4_copy *copy; struct nfs4_client *clp = cstate->clp; copy = find_async_copy(clp, &os->stateid); - if (copy) + if (!copy) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + return manage_cpntf_state(nn, &os->stateid, clp, NULL); + } else nfsd4_stop_copy(copy); - else - status = nfserr_bad_stateid; - return status; + return nfs_ok; } static __be32 diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ed5e80b3ac1f7..296765e693d00 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5681,8 +5681,9 @@ _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) * copy stateid. Look up the copy notify stateid from the * idr structure and take a reference on it. */ -static __be32 _find_cpntf_state(struct nfsd_net *nn, stateid_t *st, - struct nfs4_cpntf_state **cps) +__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_client *clp, + struct nfs4_cpntf_state **cps) { copy_stateid_t *cps_t; struct nfs4_cpntf_state *state = NULL; @@ -5696,12 +5697,16 @@ static __be32 _find_cpntf_state(struct nfsd_net *nn, stateid_t *st, cp_stateid); if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) return nfserr_bad_stateid; - refcount_inc(&state->cp_stateid.sc_count); + if (!clp) + refcount_inc(&state->cp_stateid.sc_count); + else + _free_cpntf_state_locked(nn, state); } spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; - *cps = state; + if (!clp && state) + *cps = state; return 0; } @@ -5712,7 +5717,7 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_cpntf_state *cps = NULL; struct nfsd4_compound_state cstate; - status = _find_cpntf_state(nn, st, &cps); + status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e8bee8ff30c59..c720097a91490 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -31,6 +31,12 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC +bool inter_copy_offload_enable; +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 35eb7170aefcb..ffc590de016b3 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -680,6 +680,9 @@ extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_client *clp, + struct nfs4_cpntf_state **cps); static inline void get_nfs4_file(struct nfs4_file *fi) { refcount_inc(&fi->fi_ref); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index b16f602af8f1b..db63d39b1507c 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -549,7 +549,12 @@ struct nfsd4_copy { struct task_struct *copy_task; refcount_t refcount; bool stopped; + + struct vfsmount *ss_mnt; + struct nfs_fh c_fh; + nfs4_stateid stateid; }; +extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ From 5277a79e2dee458f0185e4ebde1cd4e128f014e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Dec 2019 10:59:36 +0300 Subject: [PATCH 09/55] nfsd: unlock on error in manage_cpntf_state() We are holding the "nn->s2s_cp_lock" so we can't return directly without unlocking first. Fixes: f3dee17721a0 ("NFSD check stateids against copy stateids") Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 296765e693d00..390ad454a229c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5695,13 +5695,16 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) - return nfserr_bad_stateid; + if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { + state = NULL; + goto unlock; + } if (!clp) refcount_inc(&state->cp_stateid.sc_count); else _free_cpntf_state_locked(nn, state); } +unlock: spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; From 10db651210b2c618eb90a7fea4d6c5132bba7982 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:52 -0500 Subject: [PATCH 10/55] NFSD fix mismatching type in nfsd4_set_netaddr Fix __be32 and u32 mismatch in return and assignment. Reported-by: kbuild test robot Fixes: dbd4c2dd8f13 ("NFSD add COPY_NOTIFY operation") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 0ff6ef964a486..c679afd9fee9b 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -388,7 +388,7 @@ void nfsd_lockd_shutdown(void); extern const u32 nfsd_suppattrs[3][3]; -static inline u32 nfsd4_set_netaddr(struct sockaddr *addr, +static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr, struct nfs42_netaddr *netaddr) { struct sockaddr_in *sin = (struct sockaddr_in *)addr; From 3f9544ca62bc13cf1c145d1deae7bf3270730e0a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:53 -0500 Subject: [PATCH 11/55] NFSD: fix seqid in copy stateid s_stid->si_generation is a u32, copy->stateid.seqid is a __be32, so we should be byte-swapping here if necessary. This effectively undoes the byte-swap performed when reading s_stid->s_generation in nfsd4_decode_copy(). Without this second swap, the stateid we sent to the source in READ could be different from the one the client provided us in the COPY. We didn't spot this in testing since our implementation always uses a 0 in the seqid field. But other implementations might not do that. You'd think we should just skip the byte-swapping entirely, but the s_stid field can be used for either our own stateids (in the intra-server case) or foreign stateids (in the inter-server case), and the former are interpreted by us and need byte-swapping. Reported-by: kbuild test robot Fixes: d5e54eeb0e3d ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index fc72f57297327..42fee1f94a84b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1280,7 +1280,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, copy->c_fh.size = s_fh->fh_handle.fh_size; memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); - copy->stateid.seqid = s_stid->si_generation; + copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); From b8290ca250fb773aeaf76995af84825e7509d0d3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:53 -0500 Subject: [PATCH 12/55] NFSD fix nfserro errno mismatch There is mismatch between __be32 and u32 in nfserr and errno. Reported-by: kbuild test robot Fixes: d5e54eeb0e3d ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 42fee1f94a84b..d012f0894fabe 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1169,7 +1169,8 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, size_t tmp_addrlen, match_netid_len = 3; char *startsep = "", *endsep = "", *match_netid = "tcp"; char *ipaddr, *dev_name, *raw_data; - int len, raw_len, status = -EINVAL; + int len, raw_len; + __be32 status = nfserr_inval; naddr = &nss->u.nl4_addr; tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, @@ -1207,7 +1208,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr); - status = -ENODEV; + status = nfserr_nodev; type = get_fs_type("nfs"); if (!type) goto out_free_rawdata; @@ -1253,8 +1254,6 @@ nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) * Called with COPY cstate: * SAVED_FH: source filehandle * CURRENT_FH: destination filehandle - * - * Returns errno (not nfserrxxx) */ static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, @@ -1263,7 +1262,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, { struct svc_fh *s_fh = NULL; stateid_t *s_stid = ©->cp_src_stateid; - __be32 status = -EINVAL; + __be32 status = nfserr_inval; /* Verify the destination stateid and set dst struct file*/ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -1308,7 +1307,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct vfsmount **mount) { *mount = NULL; - return -EINVAL; + return nfserr_inval; } static void From 2e577f0faca4640348c398cb85d60a1eedac4b1e Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:54 -0500 Subject: [PATCH 13/55] NFSD fixing possible null pointer derefering in copy offload Static checker revealed possible error path leading to possible NULL pointer dereferencing. Reported-by: Dan Carpenter Fixes: e0639dc5805a: ("NFSD introduce async copy feature") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d012f0894fabe..d33c39c18cdd7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1446,7 +1446,8 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); - nfsd_file_put(copy->nf_src); + if (copy->cp_intra) + nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); list_del(©->copies); spin_unlock(©->cp_clp->async_lock); From d781e3df710745fbbaee4eb07fd5b64331a1b175 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 6 Dec 2019 16:07:32 -0500 Subject: [PATCH 14/55] nfsd4: avoid NULL deference on strange COPY compounds With cross-server COPY we've introduced the possibility that the current or saved filehandle might not have fh_dentry/fh_export filled in, but we missed a place that assumed it was. I think this could be triggered by a compound like: PUTFH(foreign filehandle) GETATTR SAVEFH COPY First, check_if_stalefh_allowed sets no_verify on the first (PUTFH) op. Then op_func = nfsd4_putfh runs and leaves current_fh->fh_export NULL. need_wrongsec_check returns true, since this PUTFH has OP_IS_PUTFH_LIKE set and GETATTR does not have OP_HANDLES_WRONGSEC set. We should probably also consider tightening the checks in check_if_stalefh_allowed and double-checking that we don't assume the filehandle is verified elsewhere in the compound. But I think this fixes the immediate issue. Reported-by: Dan Carpenter Fixes: 4e48f1cccab3 "NFSD: allow inter server COPY to have... " Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d33c39c18cdd7..30c75b961d68d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2368,7 +2368,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_flags & OP_CLEAR_STATEID) clear_current_stateid(cstate); - if (need_wrongsec_check(rqstp)) + if (current_fh->fh_export && + need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } encode_op: From 09a80f2aef06b7c86143f5c14efd3485e0d2c139 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Dec 2019 12:33:33 -0500 Subject: [PATCH 15/55] nfsd: Return the correct number of bytes written to the file We must allow for the fact that iov_iter_write() could have returned a short write (e.g. if there was an ENOSPC issue). Fixes: d890be159a71 "nfsd: Add I/O trace points in the NFSv4 write path" Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c0dc491537a69..f0bca0e87d0c4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -975,6 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; + *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); From fc5fc5d7cc9e0c7d2ca5c751cf5450c6bb635e57 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Thu, 19 Dec 2019 17:29:20 +0800 Subject: [PATCH 16/55] nfsd4: Remove unneeded semicolon Fixes coccicheck warning: fs/nfsd/nfs4state.c:3376:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 390ad454a229c..be141136aec25 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3422,7 +3422,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir) case NFS4_CDFC4_BACK_OR_BOTH: *dir = NFS4_CDFC4_BOTH; return nfs_ok; - }; + } return nfserr_inval; } From 57f64034966fb945fc958f95f0c51e47af590344 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Dec 2019 14:57:23 -0500 Subject: [PATCH 17/55] nfsd: Clone should commit src file metadata too vfs_clone_file_range() can modify the metadata on the source file too, so we need to commit that to stable storage as well. Reported-by: Dave Chinner Signed-off-by: Trond Myklebust Acked-by: Dave Chinner Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f0bca0e87d0c4..82cf80dde5c71 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -280,19 +280,25 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, * Commit metadata changes to stable storage. */ static int -commit_metadata(struct svc_fh *fhp) +commit_inode_metadata(struct inode *inode) { - struct inode *inode = d_inode(fhp->fh_dentry); const struct export_operations *export_ops = inode->i_sb->s_export_op; - if (!EX_ISSYNC(fhp->fh_export)) - return 0; - if (export_ops->commit_metadata) return export_ops->commit_metadata(inode); return sync_inode_metadata(inode, 1); } +static int +commit_metadata(struct svc_fh *fhp) +{ + struct inode *inode = d_inode(fhp->fh_dentry); + + if (!EX_ISSYNC(fhp->fh_export)) + return 0; + return commit_inode_metadata(inode); +} + /* * Go over the attributes and take care of the small differences between * NFS semantics and what Linux expects. @@ -537,6 +543,9 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, if (sync) { loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + + if (!status) + status = commit_inode_metadata(file_inode(src)); if (status < 0) return nfserrno(status); } From fc1b20659597015a30e8ea032f168e97848c1d39 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Tue, 17 Dec 2019 16:50:47 -0600 Subject: [PATCH 18/55] nfsd: remove unnecessary assertion in nfsd4_encode_replay The replay variable is set in the only caller of nfsd4_encode_replay. The assertion is unnecessary and the patch removes this check. Signed-off-by: Aditya Pakki Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1d8a08b84e32f..85949fd6ae7fc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4649,8 +4649,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) __be32 *p; struct nfs4_replay *rp = op->replay; - BUG_ON(!rp); - p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); if (!p) { WARN_ON_ONCE(1); From b3f255ef6bffc18a28c3b6295357f2a3380c033f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Oct 2017 11:25:34 +0200 Subject: [PATCH 19/55] nfsd: use ktime_get_seconds() for timestamps The delegation logic in nfsd uses the somewhat inefficient seconds_since_boot() function to record time intervals. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index be141136aec25..7bba1e150a4a9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -854,7 +854,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid) static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; - time_t swap_time; + time64_t swap_time; int new; /* index into 'set' */ DECLARE_BITMAP(set[2], 256); } blocked_delegations; @@ -866,15 +866,15 @@ static int delegation_blocked(struct knfsd_fh *fh) if (bd->entries == 0) return 0; - if (seconds_since_boot() - bd->swap_time > 30) { + if (ktime_get_seconds() - bd->swap_time > 30) { spin_lock(&blocked_delegations_lock); - if (seconds_since_boot() - bd->swap_time > 30) { + if (ktime_get_seconds() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; memset(bd->set[bd->new], 0, sizeof(bd->set[0])); bd->new = 1-bd->new; - bd->swap_time = seconds_since_boot(); + bd->swap_time = ktime_get_seconds(); } spin_unlock(&blocked_delegations_lock); } @@ -904,7 +904,7 @@ static void block_delegations(struct knfsd_fh *fh) __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) - bd->swap_time = seconds_since_boot(); + bd->swap_time = ktime_get_seconds(); bd->entries += 1; spin_unlock(&blocked_delegations_lock); } From e29f470396a7447764e73289fd63a4591a0b443b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 14:52:43 +0100 Subject: [PATCH 20/55] nfsd: print 64-bit timestamps in client_info_show The nii_time field gets truncated to 'time_t' on 32-bit architectures before printing. Remove the use of 'struct timespec' to product the correct output beyond 2038. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 ++--- fs/nfsd/state.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7bba1e150a4a9..27e1c12618b8b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2341,7 +2341,7 @@ static int client_info_show(struct seq_file *m, void *v) clp->cl_nii_domain.len); seq_printf(m, "\nImplementation name: "); seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); - seq_printf(m, "\nImplementation time: [%ld, %ld]\n", + seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } drop_client(clp); @@ -2995,8 +2995,7 @@ static __be32 copy_impl_id(struct nfs4_client *clp, xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); if (!clp->cl_nii_name.data) return nfserr_jukebox; - clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec; - clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec; + clp->cl_nii_time = exid->nii_time; return 0; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ffc590de016b3..6cf4420337865 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -340,7 +340,7 @@ struct nfs4_client { /* NFSv4.1 client implementation id: */ struct xdr_netobj cl_nii_domain; struct xdr_netobj cl_nii_name; - struct timespec cl_nii_time; + struct timespec64 cl_nii_time; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; From 92c5e46911c0c1b20fa2294298aaf008c3afe975 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 14:55:32 +0100 Subject: [PATCH 21/55] nfsd: handle nfs3 timestamps as unsigned The decode_time3 function behaves differently on 32-bit and 64-bit architectures: on the former, a 32-bit timestamp gets converted into an signed number and then into a timestamp between 1902 and 2038, while on the latter it is interpreted as unsigned in the range 1970-2106. Change all the remaining 'timespec' in nfsd to 'timespec64' to make the behavior the same, and use the current interpretation of the dominant 64-bit architectures. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 20 ++++++++------------ fs/nfsd/nfsfh.h | 4 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 195ab7a0fc891..c997b710af273 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -32,14 +32,14 @@ static u32 nfs3_ftypes[] = { * XDR functions for basic NFS types */ static __be32 * -encode_time3(__be32 *p, struct timespec *time) +encode_time3(__be32 *p, struct timespec64 *time) { *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); return p; } static __be32 * -decode_time3(__be32 *p, struct timespec *time) +decode_time3(__be32 *p, struct timespec64 *time) { time->tv_sec = ntohl(*p++); time->tv_nsec = ntohl(*p++); @@ -167,7 +167,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct timespec ts; *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); @@ -183,12 +182,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl((u32) MINOR(stat->rdev)); p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, stat->ino); - ts = timespec64_to_timespec(stat->atime); - p = encode_time3(p, &ts); - ts = timespec64_to_timespec(stat->mtime); - p = encode_time3(p, &ts); - ts = timespec64_to_timespec(stat->ctime); - p = encode_time3(p, &ts); + p = encode_time3(p, &stat->atime); + p = encode_time3(p, &stat->mtime); + p = encode_time3(p, &stat->ctime); return p; } @@ -277,8 +273,8 @@ void fill_pre_wcc(struct svc_fh *fhp) stat.size = inode->i_size; } - fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime); - fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime); + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; fhp->fh_pre_size = stat.size; fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_saved = true; @@ -330,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if ((args->check_guard = ntohl(*p++)) != 0) { - struct timespec time; + struct timespec64 time; p = decode_time3(p, &time); args->guardtime = time.tv_sec; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index b9c75680bc316..56cfbc3615618 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -42,8 +42,8 @@ typedef struct svc_fh { /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ - struct timespec fh_pre_mtime; /* mtime before oper */ - struct timespec fh_pre_ctime; /* ctime before oper */ + struct timespec64 fh_pre_mtime; /* mtime before oper */ + struct timespec64 fh_pre_ctime; /* ctime before oper */ /* * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) * to find out if it is valid. From e4598e38ee7e708d751081799f27d3f11a3e687d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 15:14:03 +0100 Subject: [PATCH 22/55] nfsd: use timespec64 in encode_time_delta The values in encode_time_delta are always small and don't overflow the range of 'struct timespec', so changing it has no effect. Change it to timespec64 as a prerequisite for removing the timespec definition later. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 85949fd6ae7fc..9761512674a09 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2099,11 +2099,11 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, */ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) { - struct timespec ts; + struct timespec64 ts; u32 ns; ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); - ts = ns_to_timespec(ns); + ts = ns_to_timespec64(ns); p = xdr_encode_hyper(p, ts.tv_sec); *p++ = cpu_to_be32(ts.tv_nsec); From 9cc7680149b238223bbb8bcf3272f6bd47f1fbfb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Oct 2019 15:53:13 +0100 Subject: [PATCH 23/55] nfsd: make 'boot_time' 64-bit wide The local boot time variable gets truncated to time_t at the moment, which can lead to slightly odd behavior on 32-bit architectures. Use ktime_get_real_seconds() instead of get_seconds() to always get a 64-bit result, and keep it that way wherever possible. It still gets truncated in a few places: - When assigning to cl_clientid.cl_boot, this is already documented and is only used as a unique identifier. - In clients_still_reclaiming(), the truncation is to 'unsigned long' in order to use the 'time_before() helper. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4recover.c | 8 ++++---- fs/nfsd/nfs4state.c | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 9a4ef815fb8c1..29bbe28eda535 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -40,7 +40,7 @@ struct nfsd_net { struct lock_manager nfsd4_manager; bool grace_ended; - time_t boot_time; + time64_t boot_time; /* internal mount of the "nfsd" pseudofilesystem: */ struct vfsmount *nfsd_mnt; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2481e7662128f..a8fb18609146a 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1445,7 +1445,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn) } cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; - cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; + cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); if (!ret) ret = cup->cu_u.cu_msg.cm_status; @@ -1782,7 +1782,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp) } static char * -nfsd4_cltrack_grace_start(time_t grace_start) +nfsd4_cltrack_grace_start(time64_t grace_start) { int copied; size_t len; @@ -1795,7 +1795,7 @@ nfsd4_cltrack_grace_start(time_t grace_start) if (!result) return result; - copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", + copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld", grace_start); if (copied >= len) { /* just return nothing if output was truncated */ @@ -2004,7 +2004,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) char *legacy; char timestr[22]; /* FIXME: better way to determine max size? */ - sprintf(timestr, "%ld", nn->boot_time); + sprintf(timestr, "%lld", nn->boot_time); legacy = nfsd4_cltrack_legacy_topdir(); nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); kfree(legacy); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 27e1c12618b8b..a178374bacd5d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -748,7 +748,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, { int new_id; - stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time; + stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; stid->sc_type = sc_type; @@ -1911,7 +1911,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) */ if (clid->cl_boot == (u32)nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", + dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n", clid->cl_boot, clid->cl_id, nn->boot_time); return 1; } @@ -2271,7 +2271,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { - clp->cl_clientid.cl_boot = nn->boot_time; + clp->cl_clientid.cl_boot = (u32)nn->boot_time; clp->cl_clientid.cl_id = nn->clientid_counter++; gen_confirm(clp, nn); } @@ -5233,9 +5233,9 @@ nfsd4_end_grace(struct nfsd_net *nn) */ static bool clients_still_reclaiming(struct nfsd_net *nn) { - unsigned long now = get_seconds(); - unsigned long double_grace_period_end = nn->boot_time + - 2 * nn->nfsd4_lease; + unsigned long now = (unsigned long) ktime_get_real_seconds(); + unsigned long double_grace_period_end = (unsigned long)nn->boot_time + + 2 * (unsigned long)nn->nfsd4_lease; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == @@ -7792,7 +7792,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; - nn->boot_time = get_seconds(); + nn->boot_time = ktime_get_real_seconds(); nn->grace_ended = false; nn->nfsd4_manager.block_opens = true; INIT_LIST_HEAD(&nn->nfsd4_manager.list); From 2a1aa48929af78bfe8c4d4fe4b1ed8996af47cee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 17:50:18 +0100 Subject: [PATCH 24/55] nfsd: pass a 64-bit guardtime to nfsd_setattr() Guardtime handling in nfs3 differs between 32-bit and 64-bit architectures, and uses the deprecated time_t type. Change it to using time64_t, which behaves the same way on 64-bit and 32-bit architectures, treating the number as an unsigned 32-bit entity with a range of year 1970 to 2106 consistently, and avoiding the y2038 overflow. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/nfsproc.c | 4 ++-- fs/nfsd/vfs.c | 4 ++-- fs/nfsd/vfs.h | 2 +- fs/nfsd/xdr3.h | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 30c75b961d68d..f39cac6e08db4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -984,7 +984,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, - 0, (time_t)0); + 0, (time64_t)0); out: fh_drop_write(&cstate->current_fh); return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a178374bacd5d..36bbb18abf775 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4721,7 +4721,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); + return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c83ddac22f38f..aa013b736073c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -113,7 +113,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0); + nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); done: return nfsd_return_attrs(nfserr, resp); } @@ -380,7 +380,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0); + nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0); } out_unlock: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 82cf80dde5c71..fe8c5d0e8c5f8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -364,7 +364,7 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) + int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; @@ -1133,7 +1133,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) - return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); + return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); /* Callers expect file metadata to be committed here */ return nfserrno(commit_metadata(resfhp)); } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index cc110a10bfe8b..bbb485177b25f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -48,7 +48,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time_t); + struct iattr *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 99ff9f403ff18..0fa12988fb6a4 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -14,7 +14,7 @@ struct nfsd3_sattrargs { struct svc_fh fh; struct iattr attrs; int check_guard; - time_t guardtime; + time64_t guardtime; }; struct nfsd3_diropargs { From b6356d420296e4fd5045ab3b404bfeb9f659ce08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 18:06:52 +0100 Subject: [PATCH 25/55] nfsd: use time64_t in nfsd_proc_setattr() check Change to time64_t and ktime_get_real_seconds() to make the logic work correctly on 32-bit architectures beyond 2038. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index aa013b736073c..b25c90be29fbd 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -94,7 +94,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) * Solaris, at least, doesn't seem to care what the time * request is. We require it be within 30 minutes of now. */ - time_t delta = iap->ia_atime.tv_sec - get_seconds(); + time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds(); nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); if (nfserr) From 2561c92b12f4f4e386d453556685f75775c0938b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 22:32:20 +0100 Subject: [PATCH 26/55] nfsd: fix delay timer on 32-bit architectures The nfsd4_cb_layout_done() function takes a 'time_t' value, multiplied by NSEC_PER_SEC*2 to get a nanosecond value. This works fine on 64-bit architectures, but on 32-bit, any value over 1 second results in a signed integer overflow with unexpected results. Cast one input to a 64-bit type in order to produce the same result that we have on 64-bit architectures, regarless of the type of nfsd4_lease. Fixes: 6b9b21073d3b ("nfsd: give up on CB_LAYOUTRECALLs after two lease periods") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4layouts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2681c70283ce2..e12409eca7cc4 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ From 9594497f2c78993cb66b696122f7c65528ace985 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 14:43:17 +0100 Subject: [PATCH 27/55] nfsd: fix jiffies/time_t mixup in LRU list The nfsd4_blocked_lock->nbl_time timestamp is recorded in jiffies, but then compared to a CLOCK_REALTIME timestamp later on, which makes no sense. For consistency with the other timestamps, change this to use a time_t. This is a change in behavior, which may cause regressions, but the current code is not sensible. On a system with CONFIG_HZ=1000, the 'time_after((unsigned long)nbl->nbl_time, (unsigned long)cutoff))' check is false for roughly the first 18 days of uptime and then true for the next 49 days. Fixes: 7919d0a27f1e ("nfsd: add a LRU list for blocked locks") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/state.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 36bbb18abf775..f680b1530f384 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6700,7 +6700,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = get_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6cf4420337865..df6bd2cb257f8 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -626,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; From 20b7d86f29d39e8ae19bb29c24ffee70dc385ddf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 16:31:52 +0100 Subject: [PATCH 28/55] nfsd: use boottime for lease expiry calculation A couple of time_t variables are only used to track the state of the lease time and its expiration. The code correctly uses the 'time_after()' macro to make this work on 32-bit architectures even beyond year 2038, but the get_seconds() function and the time_t type itself are deprecated as they behave inconsistently between 32-bit and 64-bit architectures and often lead to code that is not y2038 safe. As a minor issue, using get_seconds() leads to problems with concurrent settimeofday() or clock_settime() calls, in the worst case timeout never triggering after the time has been set backwards. Change nfsd to use time64_t and ktime_get_boottime_seconds() here. This is clearly excessive, as boottime by itself means we never go beyond 32 bits, but it does mean we handle this correctly and consistently without having to worry about corner cases and should be no more expensive than the previous implementation on 64-bit architectures. The max_cb_time() function gets changed in order to avoid an expensive 64-bit division operation, but as the lease time is at most one hour, there is no change in behavior. Also do the same for server-to-server copy expiration time. Signed-off-by: Arnd Bergmann [bfields@redhat.com: fix up copy expiration] Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 4 ++-- fs/nfsd/nfs4callback.c | 11 +++++++++- fs/nfsd/nfs4state.c | 48 +++++++++++++++++++----------------------- fs/nfsd/nfsctl.c | 6 +++--- fs/nfsd/state.h | 10 ++++----- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 29bbe28eda535..2baf32311e007 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -92,8 +92,8 @@ struct nfsd_net { bool in_grace; const struct nfsd4_client_tracking_ops *client_tracking_ops; - time_t nfsd4_lease; - time_t nfsd4_grace; + time64_t nfsd4_lease; + time64_t nfsd4_grace; bool somebody_reclaimed; bool track_reclaim_completes; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 24534db87e860..c3b11a7150829 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -823,7 +823,16 @@ static const struct rpc_program cb_program = { static int max_cb_time(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - return max(nn->nfsd4_lease/10, (time_t)1) * HZ; + + /* + * nfsd4_lease is set to at most one hour in __nfsd4_write_time, + * so we can use 32-bit math on it. Warn if that assumption + * ever stops being true. + */ + if (WARN_ON_ONCE(nn->nfsd4_lease > 3600)) + return 360 * HZ; + + return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ; } static struct workqueue_struct *callback_wq; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f680b1530f384..1ad68b49a1017 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -171,7 +171,7 @@ renew_client_locked(struct nfs4_client *clp) clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); list_move_tail(&clp->cl_lru, &nn->client_lru); - clp->cl_time = get_seconds(); + clp->cl_time = ktime_get_boottime_seconds(); } static void put_client_renew_locked(struct nfs4_client *clp) @@ -776,7 +776,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); if (!cps) return NULL; - cps->cpntf_time = get_seconds(); + cps->cpntf_time = ktime_get_boottime_seconds(); refcount_set(&cps->cp_stateid.sc_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; @@ -2661,7 +2661,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, gen_clid(clp, nn); kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); - clp->cl_time = get_seconds(); + clp->cl_time = ktime_get_boottime_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); @@ -4331,7 +4331,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) last = oo->oo_last_closed_stid; oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); - oo->oo_time = get_seconds(); + oo->oo_time = ktime_get_boottime_seconds(); spin_unlock(&nn->client_lock); if (last) nfs4_put_stid(&last->st_stid); @@ -4426,7 +4426,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) */ spin_lock(&state_lock); if (dp->dl_time == 0) { - dp->dl_time = get_seconds(); + dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } spin_unlock(&state_lock); @@ -5233,9 +5233,8 @@ nfsd4_end_grace(struct nfsd_net *nn) */ static bool clients_still_reclaiming(struct nfsd_net *nn) { - unsigned long now = (unsigned long) ktime_get_real_seconds(); - unsigned long double_grace_period_end = (unsigned long)nn->boot_time + - 2 * (unsigned long)nn->nfsd4_lease; + time64_t double_grace_period_end = nn->boot_time + + 2 * nn->nfsd4_lease; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == @@ -5248,12 +5247,12 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) * If we've given them *two* lease times to reclaim, and they're * still not done, give up: */ - if (time_after(now, double_grace_period_end)) + if (ktime_get_boottime_seconds() > double_grace_period_end) return false; return true; } -static time_t +static time64_t nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; @@ -5262,8 +5261,8 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - nn->nfsd4_lease; - time_t t, new_timeo = nn->nfsd4_lease; + time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; + time64_t t, new_timeo = nn->nfsd4_lease; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; @@ -5282,8 +5281,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - !time_after((unsigned long)cps->cpntf_time, - (unsigned long)cutoff)) + cps->cpntf_time > cutoff) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); @@ -5291,7 +5289,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { + if (clp->cl_time > cutoff) { t = clp->cl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5314,7 +5312,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { + if (dp->dl_time > cutoff) { t = dp->dl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5334,8 +5332,7 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (time_after((unsigned long)oo->oo_time, - (unsigned long)cutoff)) { + if (oo->oo_time > cutoff) { t = oo->oo_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5365,8 +5362,7 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (time_after((unsigned long)nbl->nbl_time, - (unsigned long)cutoff)) { + if (nbl->nbl_time > cutoff) { t = nbl->nbl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5383,7 +5379,7 @@ nfs4_laundromat(struct nfsd_net *nn) free_blocked_lock(nbl); } out: - new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); + new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); return new_timeo; } @@ -5393,13 +5389,13 @@ static void laundromat_main(struct work_struct *); static void laundromat_main(struct work_struct *laundry) { - time_t t; + time64_t t; struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); t = nfs4_laundromat(nn); - dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); + dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } @@ -5723,7 +5719,7 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (status) return status; - cps->cpntf_time = get_seconds(); + cps->cpntf_time = ktime_get_boottime_seconds(); memset(&cstate, 0, sizeof(cstate)); status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); if (status) @@ -6700,7 +6696,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = get_seconds(); + nbl->nbl_time = ktime_get_boottime_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); @@ -7861,7 +7857,7 @@ nfs4_state_start_net(struct net *net) nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) goto skip_grace; - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", + printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 11b42c523f045..aace740d5a924 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -956,7 +956,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, - time_t *time, struct nfsd_net *nn) + time64_t *time, struct nfsd_net *nn) { char *mesg = buf; int rv, i; @@ -984,11 +984,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, *time = i; } - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time); } static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, - time_t *time, struct nfsd_net *nn) + time64_t *time, struct nfsd_net *nn) { ssize_t rv; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index df6bd2cb257f8..68d3f30ee760c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -121,7 +121,7 @@ struct nfs4_cpntf_state { struct list_head cp_list; /* per parent nfs4_stid */ stateid_t cp_p_stateid; /* copy of parent's stateid */ clientid_t cp_p_clid; /* copy of parent's clid */ - time_t cpntf_time; /* last time stateid used */ + time64_t cpntf_time; /* last time stateid used */ }; /* @@ -152,7 +152,7 @@ struct nfs4_delegation { struct list_head dl_recall_lru; /* delegation recalled */ struct nfs4_clnt_odstate *dl_clnt_odstate; u32 dl_type; - time_t dl_time; + time64_t dl_time; /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; @@ -330,7 +330,7 @@ struct nfs4_client { #endif struct xdr_netobj cl_name; /* id generated by client */ nfs4_verifier cl_verifier; /* generated by client */ - time_t cl_time; /* time of last lease renewal */ + time64_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ @@ -469,7 +469,7 @@ struct nfs4_openowner { */ struct list_head oo_close_lru; struct nfs4_ol_stateid *oo_last_closed_stid; - time_t oo_time; /* time of placement on so_close_lru */ + time64_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 unsigned char oo_flags; }; @@ -626,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - time_t nbl_time; + time64_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; From 9104ae494e67305c11f5793b82cc768e0f09cea9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 16:45:30 +0100 Subject: [PATCH 29/55] nfsd: use ktime_get_real_seconds() in nfs4_verifier gen_confirm() generates a unique identifier based on the current time. This overflows in year 2038, but that is harmless since it generally does not lead to duplicates, as long as the time has been initialized by a real-time clock or NTP. Using ktime_get_boottime_seconds() or ktime_get_seconds() would avoid the overflow, but it would be more likely to result in non-unique numbers. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1ad68b49a1017..65cfe9ab47be0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2264,7 +2264,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy */ - verf[0] = (__force __be32)get_seconds(); + verf[0] = (__force __be32)(u32)ktime_get_real_seconds(); verf[1] = (__force __be32)nn->clverifier_counter++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } From 364d5814b9dd40f6219e7673466c2e18becd62ca Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 16:51:55 +0100 Subject: [PATCH 30/55] nfsd: remove nfs4_reset_lease() declarations The function was removed a long time ago, but the declaration and a dummy implementation are still there, referencing the deprecated time_t type. Remove both. Fixes: f958a1320ff7 ("nfsd4: remove unnecessary lease-setting function") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index c679afd9fee9b..2ab5569126b8a 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -143,7 +143,6 @@ int nfs4_state_start(void); int nfs4_state_start_net(struct net *net); void nfs4_state_shutdown(void); void nfs4_state_shutdown_net(struct net *net); -void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); @@ -154,7 +153,6 @@ static inline int nfs4_state_start(void) { return 0; } static inline int nfs4_state_start_net(struct net *net) { return 0; } static inline void nfs4_state_shutdown(void) { } static inline void nfs4_state_shutdown_net(struct net *net) { } -static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } static inline char * nfs4_recoverydir(void) {return NULL; } static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) From 384a7ccaa37dff7c84123547dcfd98e2dd4144ba Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 25 Dec 2019 11:19:34 +0800 Subject: [PATCH 31/55] nfsd: use true,false for bool variable in vfs.c Fixes coccicheck warning: fs/nfsd/vfs.c:1389:5-13: WARNING: Assignment of 0/1 to bool variable fs/nfsd/vfs.c:1398:5-13: WARNING: Assignment of 0/1 to bool variable fs/nfsd/vfs.c:1415:2-10: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fe8c5d0e8c5f8..3aeffb26fca5d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1396,7 +1396,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; break; } /* fall through */ @@ -1405,7 +1405,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; goto set_attr; } /* fall through */ @@ -1422,7 +1422,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } if (created) - *created = 1; + *created = true; nfsd_check_ignore_resizing(iap); From 500c248171408b80a6c87d278085ad1c8be762c2 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 25 Dec 2019 11:19:35 +0800 Subject: [PATCH 32/55] nfsd: use true,false for bool variable in nfs4proc.c Fixes coccicheck warning: fs/nfsd/nfs4proc.c:235:1-18: WARNING: Assignment of 0/1 to bool variable fs/nfsd/nfs4proc.c:368:1-17: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f39cac6e08db4..1e14b3ed5674d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -233,7 +233,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru if (!*resfh) return nfserr_jukebox; fh_init(*resfh, NFS4_FHSIZE); - open->op_truncate = 0; + open->op_truncate = false; if (open->op_create) { /* FIXME: check session persistence and pnfs flags. @@ -366,7 +366,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; - open->op_created = 0; + open->op_created = false; /* * RFC5661 18.51.3 * Before RECLAIM_COMPLETE done, server should deny new lock From e44b4bf2648ef05ff1cde9f354079bce58a5ebde Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 25 Dec 2019 11:19:36 +0800 Subject: [PATCH 33/55] nfsd: use true,false for bool variable in nfssvc.c Fixes coccicheck warning: fs/nfsd/nfssvc.c:394:2-14: WARNING: Assignment of 0/1 to bool variable fs/nfsd/nfssvc.c:407:2-14: WARNING: Assignment of 0/1 to bool variable fs/nfsd/nfssvc.c:422:2-14: WARNING: Assignment of 0/1 to bool variable Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c720097a91490..7900f3494ecb7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -397,7 +397,7 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre ret = lockd_up(net, cred); if (ret) goto out_socks; - nn->lockd_up = 1; + nn->lockd_up = true; } ret = nfs4_state_start_net(net); @@ -410,7 +410,7 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre out_lockd: if (nn->lockd_up) { lockd_down(net); - nn->lockd_up = 0; + nn->lockd_up = false; } out_socks: nfsd_shutdown_generic(); @@ -425,7 +425,7 @@ static void nfsd_shutdown_net(struct net *net) nfs4_state_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); - nn->lockd_up = 0; + nn->lockd_up = false; } nn->nfsd_net_up = false; nfsd_shutdown_generic(); From 28c7d86bb6172ffbb1a1237c6388e77f9fe5f181 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:03 -0500 Subject: [PATCH 34/55] nfsd: fix filecache lookup If the lookup keeps finding a nfsd_file with an unhashed open file, then retry once only. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Fixes: 65294c1f2c5e "nfsd: add a new struct file caching facility to nfsd" Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 32a9bf22ac08d..0a3e5c2aac4b4 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -789,6 +789,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, *new; struct inode *inode; unsigned int hashval; + bool retry = true; /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, @@ -824,6 +825,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + if (!retry) { + status = nfserr_jukebox; + goto out; + } + retry = false; nfsd_file_put_noref(nf); goto retry; } From 36ebbdb96b694dd9c6b25ad98f2bbd263d022b63 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:04 -0500 Subject: [PATCH 35/55] nfsd: cleanup nfsd_file_lru_dispose() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0a3e5c2aac4b4..c048e3071db76 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -256,8 +256,6 @@ nfsd_file_do_unhash(struct nfsd_file *nf) nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; hlist_del_rcu(&nf->nf_node); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); atomic_long_dec(&nfsd_filecache_count); } @@ -266,6 +264,8 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); + if (!list_empty(&nf->nf_lru)) + list_lru_del(&nfsd_file_lru, &nf->nf_lru); return true; } return false; @@ -402,15 +402,14 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, static void nfsd_file_lru_dispose(struct list_head *head) { - while(!list_empty(head)) { - struct nfsd_file *nf = list_first_entry(head, - struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + struct nfsd_file *nf; + + list_for_each_entry(nf, head, nf_lru) { spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); nfsd_file_do_unhash(nf); spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_put_noref(nf); } + nfsd_file_dispose_list(head); } static unsigned long From 9542e6a643fc69d528dfb3303f145719c61d3050 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:05 -0500 Subject: [PATCH 36/55] nfsd: Containerise filecache laundrette Ensure that if the filecache laundrette gets stuck, it only affects the knfsd instances of one container. The notifier callbacks can be called from various contexts so avoid using synchonous filesystem operations that might deadlock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 238 ++++++++++++++++++++++++++++++++++++-------- fs/nfsd/filecache.h | 2 + fs/nfsd/nfssvc.c | 9 +- 3 files changed, 207 insertions(+), 42 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c048e3071db76..e71af553c2ed0 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -44,6 +44,17 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +struct nfsd_fcache_disposal { + struct list_head list; + struct work_struct work; + struct net *net; + spinlock_t lock; + struct list_head freeme; + struct rcu_head rcu; +}; + +struct workqueue_struct *nfsd_filecache_wq __read_mostly; + static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct nfsd_fcache_bucket *nfsd_file_hashtbl; @@ -52,32 +63,21 @@ static long nfsd_file_lru_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static DEFINE_SPINLOCK(laundrette_lock); +static LIST_HEAD(laundrettes); -enum nfsd_file_laundrette_ctl { - NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, - NFSD_FILE_LAUNDRETTE_MAY_FLUSH -}; +static void nfsd_file_gc(void); static void -nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) +nfsd_file_schedule_laundrette(void) { long count = atomic_long_read(&nfsd_filecache_count); if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) return; - /* Be more aggressive about scanning if over the threshold */ - if (count > NFSD_FILE_LRU_THRESHOLD) - mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); - else - schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); - - if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) - return; - - /* ...and don't delay flushing if we're out of control */ - if (count >= NFSD_FILE_LRU_LIMIT) - flush_delayed_work(&nfsd_filecache_laundrette); + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -312,7 +312,9 @@ nfsd_file_put(struct nfsd_file *nf) set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); + nfsd_file_schedule_laundrette(); + if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) + nfsd_file_gc(); } struct nfsd_file * @@ -353,6 +355,58 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) flush_delayed_fput(); } +static void +nfsd_file_list_remove_disposal(struct list_head *dst, + struct nfsd_fcache_disposal *l) +{ + spin_lock(&l->lock); + list_splice_init(&l->freeme, dst); + spin_unlock(&l->lock); +} + +static void +nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net == net) { + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); + break; + } + } + rcu_read_unlock(); +} + +static void +nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, + struct net *net) +{ + struct nfsd_file *nf, *tmp; + + list_for_each_entry_safe(nf, tmp, src, nf_lru) { + if (nf->nf_net == net) + list_move_tail(&nf->nf_lru, dst); + } +} + +static void +nfsd_file_dispose_list_delayed(struct list_head *dispose) +{ + LIST_HEAD(list); + struct nfsd_file *nf; + + while(!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); + nfsd_file_list_add_disposal(&list, nf->nf_net); + } +} + /* * Note this can deadlock with nfsd_file_cache_purge. */ @@ -399,17 +453,40 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } -static void -nfsd_file_lru_dispose(struct list_head *head) +static unsigned long +nfsd_file_lru_walk_list(struct shrink_control *sc) { + LIST_HEAD(head); struct nfsd_file *nf; + unsigned long ret; - list_for_each_entry(nf, head, nf_lru) { + if (sc) + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &head); + else + ret = list_lru_walk(&nfsd_file_lru, + nfsd_file_lru_cb, + &head, LONG_MAX); + list_for_each_entry(nf, &head, nf_lru) { spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); nfsd_file_do_unhash(nf); spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); } - nfsd_file_dispose_list(head); + nfsd_file_dispose_list_delayed(&head); + return ret; +} + +static void +nfsd_file_gc(void) +{ + nfsd_file_lru_walk_list(NULL); +} + +static void +nfsd_file_gc_worker(struct work_struct *work) +{ + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -421,12 +498,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - LIST_HEAD(head); - unsigned long ret; - - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); - nfsd_file_lru_dispose(&head); - return ret; + return nfsd_file_lru_walk_list(sc); } static struct shrinker nfsd_file_shrinker = { @@ -488,7 +560,7 @@ nfsd_file_close_inode(struct inode *inode) __nfsd_file_close_inode(inode, hashval, &dispose); trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); - nfsd_file_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** @@ -504,16 +576,11 @@ static void nfsd_file_delayed_close(struct work_struct *work) { LIST_HEAD(head); + struct nfsd_fcache_disposal *l = container_of(work, + struct nfsd_fcache_disposal, work); - list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); - - if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); - - if (!list_empty(&head)) { - nfsd_file_lru_dispose(&head); - flush_delayed_fput(); - } + nfsd_file_list_remove_disposal(&head, l); + nfsd_file_dispose_list(&head); } static int @@ -574,6 +641,10 @@ nfsd_file_cache_init(void) if (nfsd_file_hashtbl) return 0; + nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); + if (!nfsd_filecache_wq) + goto out; + nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { @@ -627,7 +698,7 @@ nfsd_file_cache_init(void) spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; out_notifier: @@ -643,6 +714,8 @@ nfsd_file_cache_init(void) nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; goto out; } @@ -681,6 +754,88 @@ nfsd_file_cache_purge(struct net *net) } } +static struct nfsd_fcache_disposal * +nfsd_alloc_fcache_disposal(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = kmalloc(sizeof(*l), GFP_KERNEL); + if (!l) + return NULL; + INIT_WORK(&l->work, nfsd_file_delayed_close); + l->net = net; + spin_lock_init(&l->lock); + INIT_LIST_HEAD(&l->freeme); + return l; +} + +static void +nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + rcu_assign_pointer(l->net, NULL); + cancel_work_sync(&l->work); + nfsd_file_dispose_list(&l->freeme); + kfree_rcu(l, rcu); +} + +static void +nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_add_tail_rcu(&l->list, &laundrettes); + spin_unlock(&laundrette_lock); +} + +static void +nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_del_rcu(&l->list); + spin_unlock(&laundrette_lock); +} + +static int +nfsd_alloc_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = nfsd_alloc_fcache_disposal(net); + if (!l) + return -ENOMEM; + nfsd_add_fcache_disposal(l); + return 0; +} + +static void +nfsd_free_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net != net) + continue; + nfsd_del_fcache_disposal(l); + rcu_read_unlock(); + nfsd_free_fcache_disposal(l); + return; + } + rcu_read_unlock(); +} + +int +nfsd_file_cache_start_net(struct net *net) +{ + return nfsd_alloc_fcache_disposal_net(net); +} + +void +nfsd_file_cache_shutdown_net(struct net *net) +{ + nfsd_file_cache_purge(net); + nfsd_free_fcache_disposal_net(net); +} + void nfsd_file_cache_shutdown(void) { @@ -705,6 +860,8 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; } static bool @@ -872,7 +1029,8 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, nfsd_file_hashtbl[hashval].nfb_count); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - atomic_long_inc(&nfsd_filecache_count); + if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) + nfsd_file_gc(); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 851d9abf54c25..79a7d6808d979 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -51,6 +51,8 @@ struct nfsd_file { int nfsd_file_cache_init(void); void nfsd_file_cache_purge(struct net *); void nfsd_file_cache_shutdown(void); +int nfsd_file_cache_start_net(struct net *net); +void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7900f3494ecb7..3b77b904212d8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -400,13 +400,18 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre nn->lockd_up = true; } - ret = nfs4_state_start_net(net); + ret = nfsd_file_cache_start_net(net); if (ret) goto out_lockd; + ret = nfs4_state_start_net(net); + if (ret) + goto out_filecache; nn->nfsd_net_up = true; return 0; +out_filecache: + nfsd_file_cache_shutdown_net(net); out_lockd: if (nn->lockd_up) { lockd_down(net); @@ -421,7 +426,7 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_purge(net); + nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); From bd6e1cece834f1b1322c85269b93379f8092077f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:06 -0500 Subject: [PATCH 37/55] nfsd: Remove unused constant NFSD_FILE_LRU_RESCAN Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e71af553c2ed0..6b0ab43b06183 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -27,7 +27,6 @@ #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_LRU_RESCAN (0) #define NFSD_FILE_SHUTDOWN (1) #define NFSD_FILE_LRU_THRESHOLD (4096UL) #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) @@ -440,15 +439,13 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, goto out_skip; if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_rescan; + goto out_skip; if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) goto out_skip; list_lru_isolate_move(lru, &nf->nf_lru, head); return LRU_REMOVED; -out_rescan: - set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); out_skip: return LRU_SKIP; } From 55f84cc47f73bdc84ef73b702c23051b426505a4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:07 -0500 Subject: [PATCH 38/55] nfsd: Schedule the laundrette regularly irrespective of file errors Emsure we schedule the laundrette even if the struct file is carrying file errors. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6b0ab43b06183..4cef03a7726c9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -237,13 +237,6 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } -static bool -nfsd_file_in_use(struct nfsd_file *nf) -{ - return nfsd_file_check_writeback(nf) || - nfsd_file_check_write_error(nf); -} - static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -307,10 +300,9 @@ void nfsd_file_put(struct nfsd_file *nf) { bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - bool unused = !nfsd_file_in_use(nf); set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) + if (nfsd_file_put_noref(nf) == 1 && is_hashed) nfsd_file_schedule_laundrette(); if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From b6669305d35a3459877afef6e9f68e81bef92972 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:18:08 -0500 Subject: [PATCH 39/55] nfsd: Reduce the number of calls to nfsd_file_gc() Don't call nfsd_file_gc() on every put of the reference in nfsd_file_put(). Instead, do it only when we're expecting the refcount to go to 1. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 4cef03a7726c9..9c2b29e079750 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -282,27 +282,32 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp return true; } -static int +static void nfsd_file_put_noref(struct nfsd_file *nf) { - int count; trace_nfsd_file_put(nf); - count = atomic_dec_return(&nf->nf_ref); - if (!count) { + if (atomic_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); nfsd_file_free(nf); } - return count; } void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; + bool is_hashed; set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (nfsd_file_put_noref(nf) == 1 && is_hashed) + if (atomic_read(&nf->nf_ref) > 2 || !nf->nf_file) { + nfsd_file_put_noref(nf); + return; + } + + filemap_flush(nf->nf_file->f_mapping); + is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; + nfsd_file_put_noref(nf); + if (is_hashed) nfsd_file_schedule_laundrette(); if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From 90d2f1da832fd23290ef0c0d964d97501e5e8553 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:20:47 -0500 Subject: [PATCH 40/55] nfsd: Fix a soft lockup race in nfsd_file_mark_find_or_create() If nfsd_file_mark_find_or_create() keeps winning the race for the nfsd_file_fsnotify_group->mark_mutex against nfsd_file_mark_put() then it can soft lock up, since fsnotify_add_inode_mark() ends up always finding an existing entry. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9c2b29e079750..f275c11c4e284 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -132,9 +132,13 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) struct nfsd_file_mark, nfm_mark)); mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); - fsnotify_put_mark(mark); - if (likely(nfm)) + if (nfm) { + fsnotify_put_mark(mark); break; + } + /* Avoid soft lockup race with nfsd_file_mark_put() */ + fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); + fsnotify_put_mark(mark); } else mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); From 16f8f894108270fd99667f06bae6ebcdd0f4e4bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:29 -0500 Subject: [PATCH 41/55] nfsd: Allow nfsd_vfs_write() to take the nfsd_file as an argument Needed in order to fix stable writes. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/vfs.c | 5 +++-- fs/nfsd/vfs.h | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 1e14b3ed5674d..a2baf538473cf 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1021,7 +1021,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &write->wr_head, write->wr_buflen); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file, + status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, write->wr_offset, rqstp->rq_vec, nvecs, &cnt, write->wr_how_written); nfsd_file_put(nf); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3aeffb26fca5d..e1ffefab2552f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -947,10 +947,11 @@ static int wait_for_concurrent_writes(struct file *file) } __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int stable) { + struct file *file = nf->nf_file; struct svc_export *exp; struct iov_iter iter; __be32 nfserr; @@ -1057,7 +1058,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec, + err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, vlen, cnt, stable); nfsd_file_put(nf); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index bbb485177b25f..0174e957b27e4 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -34,6 +34,8 @@ #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) +struct nfsd_file; + /* * Callback function for readdir */ @@ -93,7 +95,7 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, int); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, + struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int stable); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, From 5011af4c698a95b4dc4e374b1ce3acd122a9170f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:30 -0500 Subject: [PATCH 42/55] nfsd: Fix stable writes Strictly speaking, a stable write error needs to reflect the write + the commit of that write (and only that write). To ensure that we don't pick up the write errors from other writebacks, add a rw_semaphore to provide exclusion. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 1 + fs/nfsd/filecache.h | 1 + fs/nfsd/vfs.c | 18 ++++++++++++++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f275c11c4e284..2fadf080ac42f 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -195,6 +195,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); } nf->nf_mark = NULL; + init_rwsem(&nf->nf_rwsem); trace_nfsd_file_alloc(nf); } return nf; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 79a7d6808d979..986c325a54bd3 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -46,6 +46,7 @@ struct nfsd_file { atomic_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; + struct rw_semaphore nf_rwsem; }; int nfsd_file_cache_init(void); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e1ffefab2552f..4652854f3dd57 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -982,7 +982,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, flags |= RWF_SYNC; iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); - host_err = vfs_iter_write(file, &iter, &pos, flags); + if (flags & RWF_SYNC) { + down_write(&nf->nf_rwsem); + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); + up_write(&nf->nf_rwsem); + } else { + down_read(&nf->nf_rwsem); + host_err = vfs_iter_write(file, &iter, &pos, flags); + up_read(&nf->nf_rwsem); + } if (host_err < 0) goto out_nfserr; *cnt = host_err; @@ -1097,8 +1108,10 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { - int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + int err2; + down_write(&nf->nf_rwsem); + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: break; @@ -1110,6 +1123,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); } + up_write(&nf->nf_rwsem); } nfsd_file_put(nf); From 7bf94c6ba9fb291bd28fac3228553cb305bfc92a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:31 -0500 Subject: [PATCH 43/55] nfsd: Update the boot verifier on stable writes too. We don't know if the error returned by the fsync() call is exclusive to the data written by the stable write, so play it safe. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4652854f3dd57..0a048dfb68ec1 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -994,8 +994,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, host_err = vfs_iter_write(file, &iter, &pos, flags); up_read(&nf->nf_rwsem); } - if (host_err < 0) + if (host_err < 0) { + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); goto out_nfserr; + } *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); From b66ae6dd0c30c750cbc5c633dea08712203abc03 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:32 -0500 Subject: [PATCH 44/55] nfsd: Pass the nfsd_file as arguments to nfsd4_clone_file_range() Needed in order to fix exclusion w.r.t. writes. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/vfs.c | 6 ++++-- fs/nfsd/vfs.h | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a2baf538473cf..634f32d847e02 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1085,8 +1085,8 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos, - dst->nf_file, clone->cl_dst_pos, clone->cl_count, + status = nfsd4_clone_file_range(src, clone->cl_src_pos, + dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); nfsd_file_put(dst); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0a048dfb68ec1..7950f2ea1d95b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -530,9 +530,11 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif -__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, - u64 dst_pos, u64 count, bool sync) +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) { + struct file *src = nf_src->nf_file; + struct file *dst = nf_dst->nf_file; loff_t cloned; cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 0174e957b27e4..a91cd79828850 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -57,8 +57,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct file *, u64, struct file *, - u64, u64, bool); +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, + u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, From 1b28d756b2eaee03e838390465754e431389e483 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:33 -0500 Subject: [PATCH 45/55] nfsd: Ensure exclusion between CLONE and WRITE errors Ensure that we can distinguish between synchronous CLONE and WRITE errors, and that we can assign them correctly. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7950f2ea1d95b..126149b064637 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -536,22 +536,33 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; loff_t cloned; + __be32 ret = 0; + down_write(&nf_dst->nf_rwsem); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); - if (cloned < 0) - return nfserrno(cloned); - if (count && cloned != count) - return nfserrno(-EINVAL); + if (cloned < 0) { + ret = nfserrno(cloned); + goto out_err; + } + if (count && cloned != count) { + ret = nfserrno(-EINVAL); + goto out_err; + } if (sync) { loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); if (!status) status = commit_inode_metadata(file_inode(src)); - if (status < 0) - return nfserrno(status); + if (status < 0) { + nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, + nfsd_net_id)); + ret = nfserrno(status); + } } - return 0; +out_err: + up_write(&nf_dst->nf_rwsem); + return ret; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, From 1d82163714c16ebe09c7a8c9cd3cef7abcc16208 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:34 -0500 Subject: [PATCH 46/55] sunrpc: Fix potential leaks in sunrpc_cache_unhash() When we unhash the cache entry, we need to handle any pending upcalls by calling cache_fresh_unlocked(). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f740cb51802af..7ede1e52fd812 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1888,7 +1888,9 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) if (!hlist_unhashed(&h->cache_list)){ hlist_del_init_rcu(&h->cache_list); cd->entries--; + set_bit(CACHE_CLEANED, &h->flags); spin_unlock(&cd->hash_lock); + cache_fresh_unlocked(h, cd); cache_put(h, cd); } else spin_unlock(&cd->hash_lock); From 809fe3c533789ebb51c1873b80c7dafbad9dbf19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:35 -0500 Subject: [PATCH 47/55] sunrpc: clean up cache entry add/remove from hashtable Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 50 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7ede1e52fd812..52d927210d326 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -77,6 +77,22 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, return NULL; } +static void sunrpc_begin_cache_remove_entry(struct cache_head *ch, + struct cache_detail *cd) +{ + /* Must be called under cd->hash_lock */ + hlist_del_init_rcu(&ch->cache_list); + set_bit(CACHE_CLEANED, &ch->flags); + cd->entries --; +} + +static void sunrpc_end_cache_remove_entry(struct cache_head *ch, + struct cache_detail *cd) +{ + cache_fresh_unlocked(ch, cd); + cache_put(ch, cd); +} + static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, struct cache_head *key, int hash) @@ -100,8 +116,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, hlist_for_each_entry_rcu(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) { - hlist_del_init_rcu(&tmp->cache_list); - detail->entries --; + sunrpc_begin_cache_remove_entry(tmp, detail); freeme = tmp; break; } @@ -117,10 +132,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, cache_get(new); spin_unlock(&detail->hash_lock); - if (freeme) { - cache_fresh_unlocked(freeme, detail); - cache_put(freeme, detail); - } + if (freeme) + sunrpc_end_cache_remove_entry(freeme, detail); return new; } @@ -454,8 +467,7 @@ static int cache_clean(void) if (!cache_is_expired(current_detail, ch)) continue; - hlist_del_init_rcu(&ch->cache_list); - current_detail->entries--; + sunrpc_begin_cache_remove_entry(ch, current_detail); rv = 1; break; } @@ -465,11 +477,8 @@ static int cache_clean(void) if (!ch) current_index ++; spin_unlock(&cache_list_lock); - if (ch) { - set_bit(CACHE_CLEANED, &ch->flags); - cache_fresh_unlocked(ch, d); - cache_put(ch, d); - } + if (ch) + sunrpc_end_cache_remove_entry(ch, d); } else spin_unlock(&cache_list_lock); @@ -525,13 +534,9 @@ void cache_purge(struct cache_detail *detail) for (i = 0; i < detail->hash_size; i++) { head = &detail->hash_table[i]; hlist_for_each_entry_safe(ch, tmp, head, cache_list) { - hlist_del_init_rcu(&ch->cache_list); - detail->entries--; - - set_bit(CACHE_CLEANED, &ch->flags); + sunrpc_begin_cache_remove_entry(ch, detail); spin_unlock(&detail->hash_lock); - cache_fresh_unlocked(ch, detail); - cache_put(ch, detail); + sunrpc_end_cache_remove_entry(ch, detail); spin_lock(&detail->hash_lock); } } @@ -1886,12 +1891,9 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) { spin_lock(&cd->hash_lock); if (!hlist_unhashed(&h->cache_list)){ - hlist_del_init_rcu(&h->cache_list); - cd->entries--; - set_bit(CACHE_CLEANED, &h->flags); + sunrpc_begin_cache_remove_entry(h, cd); spin_unlock(&cd->hash_lock); - cache_fresh_unlocked(h, cd); - cache_put(h, cd); + sunrpc_end_cache_remove_entry(h, cd); } else spin_unlock(&cd->hash_lock); } From 524ff1af226ce0f6348d48b413849b2bd16c5b60 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:36 -0500 Subject: [PATCH 48/55] nfsd: Ensure sampling of the commit verifier is atomic with the commit When we have a successful commit, ensure we sample the commit verifier before releasing the lock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 3 ++- fs/nfsd/nfs3xdr.c | 8 ++------ fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/vfs.c | 8 ++++++-- fs/nfsd/vfs.h | 2 +- fs/nfsd/xdr3.h | 1 + 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index cea68d8411ac5..ffdc592868a6c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -683,7 +683,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_inval); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count); + nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, + resp->verf); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index c997b710af273..1ae4fc2eec009 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1121,16 +1121,12 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_commitres *resp = rqstp->rq_resp; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - __be32 verf[2]; p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - /* unique identifier, y2038 overflow can be ignored */ - nfsd_copy_boot_verifier(verf, nn); - *p++ = verf[0]; - *p++ = verf[1]; + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 634f32d847e02..c3455e1b0d507 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -590,9 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_commit *commit = &u->commit; - gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, - commit->co_count); + commit->co_count, + (__be32 *)commit->co_verf.data); } static __be32 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 126149b064637..dc07d9797f115 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1105,7 +1105,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, */ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count) + loff_t offset, unsigned long count, __be32 *verf) { struct nfsd_file *nf; loff_t end = LLONG_MAX; @@ -1130,6 +1130,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); break; case -EINVAL: err = nfserr_notsupp; @@ -1140,7 +1142,9 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_net_id)); } up_write(&nf->nf_rwsem); - } + } else + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); nfsd_file_put(nf); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a91cd79828850..d23d9daacc3db 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -74,7 +74,7 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, struct svc_fh *res, int createmode, u32 *verifier, bool *truncp, bool *created); __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, - loff_t, unsigned long); + loff_t, unsigned long, __be32 *verf); #endif /* CONFIG_NFSD_V3 */ int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 0fa12988fb6a4..fe6fd4556c2e5 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -223,6 +223,7 @@ struct nfsd3_pathconfres { struct nfsd3_commitres { __be32 status; struct svc_fh fh; + __be32 verf[2]; }; struct nfsd3_getaclres { From 19e0663ff9bce2efb87be8b30f9e46b7843600f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 13:40:37 -0500 Subject: [PATCH 49/55] nfsd: Ensure sampling of the write verifier is atomic with the write When doing an unstable write, we need to ensure that we sample the write verifier before releasing the lock, and allowing a commit to the same file to proceed. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs3xdr.c | 8 ++------ fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfsproc.c | 2 +- fs/nfsd/vfs.c | 12 +++++++++--- fs/nfsd/vfs.h | 5 +++-- fs/nfsd/xdr3.h | 1 + 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index ffdc592868a6c..288bc76b4574d 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -203,7 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, nvecs, &cnt, - resp->committed); + resp->committed, resp->verf); resp->count = cnt; RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1ae4fc2eec009..aae514d40b641 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -747,17 +747,13 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_writeres *resp = rqstp->rq_resp; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - __be32 verf[2]; p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - /* unique identifier, y2038 overflow can be ignored */ - nfsd_copy_boot_verifier(verf, nn); - *p++ = verf[0]; - *p++ = verf[1]; + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c3455e1b0d507..e4ddaf87493ae 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1015,7 +1015,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } write->wr_how_written = write->wr_stable_how; - gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, &write->wr_head, write->wr_buflen); @@ -1023,7 +1022,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - write->wr_how_written); + write->wr_how_written, + (__be32 *)write->wr_verifier.data); nfsd_file_put(nf); write->wr_bytes_written = cnt; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index b25c90be29fbd..543bbe0a556e7 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -226,7 +226,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) return nfserr_io; nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC); + &cnt, NFS_DATA_SYNC, NULL); return nfsd_return_attrs(nfserr, resp); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index dc07d9797f115..0aa02eb18bd3c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -962,7 +962,8 @@ static int wait_for_concurrent_writes(struct file *file) __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable) + unsigned long *cnt, int stable, + __be32 *verf) { struct file *file = nf->nf_file; struct svc_export *exp; @@ -1004,6 +1005,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, up_write(&nf->nf_rwsem); } else { down_read(&nf->nf_rwsem); + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); host_err = vfs_iter_write(file, &iter, &pos, flags); up_read(&nf->nf_rwsem); } @@ -1074,7 +1079,8 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable) + struct kvec *vec, int vlen, unsigned long *cnt, int stable, + __be32 *verf) { struct nfsd_file *nf; __be32 err; @@ -1086,7 +1092,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, goto out; err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, - vlen, cnt, stable); + vlen, cnt, stable, verf); nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index d23d9daacc3db..3eb660ad80d1c 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -94,11 +94,12 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, u32 *eof); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, - struct kvec *, int, unsigned long *, int); + struct kvec *, int, unsigned long *, + int stable, __be32 *verf); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, - int stable); + int stable, __be32 *verf); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index fe6fd4556c2e5..4155fd71714c9 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -159,6 +159,7 @@ struct nfsd3_writeres { struct svc_fh fh; unsigned long count; int committed; + __be32 verf[2]; }; struct nfsd3_renameres { From a9ceb060b3cf37987b6162223575eaf4f4e0fc36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:00:21 -0500 Subject: [PATCH 50/55] nfsd: Fix a perf warning perf does not know how to deal with a __builtin_bswap32() call, and complains. All other functions just store the xid etc in host endian form, so let's do that in the tracepoint for nfsd_file_acquire too. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/trace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index ffc78a0e28b24..b073bdc2e6e89 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -228,7 +228,7 @@ TRACE_EVENT(nfsd_file_acquire, TP_ARGS(rqstp, hash, inode, may_flags, nf, status), TP_STRUCT__entry( - __field(__be32, xid) + __field(u32, xid) __field(unsigned int, hash) __field(void *, inode) __field(unsigned int, may_flags) @@ -236,11 +236,11 @@ TRACE_EVENT(nfsd_file_acquire, __field(unsigned long, nf_flags) __field(unsigned char, nf_may) __field(struct file *, nf_file) - __field(__be32, status) + __field(u32, status) ), TP_fast_assign( - __entry->xid = rqstp->rq_xid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; @@ -248,15 +248,15 @@ TRACE_EVENT(nfsd_file_acquire, __entry->nf_flags = nf ? nf->nf_flags : 0; __entry->nf_may = nf ? nf->nf_may : 0; __entry->nf_file = nf ? nf->nf_file : NULL; - __entry->status = status; + __entry->status = be32_to_cpu(status); ), TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", - be32_to_cpu(__entry->xid), __entry->hash, __entry->inode, + __entry->xid, __entry->hash, __entry->inode, show_nf_may(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nf_may(__entry->nf_may), __entry->nf_file, - be32_to_cpu(__entry->status)) + __entry->status) ); DECLARE_EVENT_CLASS(nfsd_file_search_class, From c19285596de699e4602f9c89785e6b8c29422286 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:00:22 -0500 Subject: [PATCH 51/55] nfsd: Define the file access mode enum for tracing Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/trace.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index b073bdc2e6e89..17ecef404e5bd 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -166,6 +166,12 @@ DEFINE_STATEID_EVENT(layout_recall_done); DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_release); +TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); +TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); +TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); + #define show_nf_flags(val) \ __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ From 689827cd5bfe89e4900db7e1c0c713083a76d04c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jan 2020 12:02:44 -0500 Subject: [PATCH 52/55] nfsd: convert file cache to use over/underflow safe refcount Use the 'refcount_t' type instead of 'atomic_t' for improved refcounting safety. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 23 +++++++++++------------ fs/nfsd/filecache.h | 4 ++-- fs/nfsd/trace.h | 4 ++-- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 2fadf080ac42f..23c1fa5da1e9e 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -100,7 +100,7 @@ nfsd_file_mark_free(struct fsnotify_mark *mark) static struct nfsd_file_mark * nfsd_file_mark_get(struct nfsd_file_mark *nfm) { - if (!atomic_inc_not_zero(&nfm->nfm_ref)) + if (!refcount_inc_not_zero(&nfm->nfm_ref)) return NULL; return nfm; } @@ -108,8 +108,7 @@ nfsd_file_mark_get(struct nfsd_file_mark *nfm) static void nfsd_file_mark_put(struct nfsd_file_mark *nfm) { - if (atomic_dec_and_test(&nfm->nfm_ref)) { - + if (refcount_dec_and_test(&nfm->nfm_ref)) { fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); fsnotify_put_mark(&nfm->nfm_mark); } @@ -148,7 +147,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) return NULL; fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; - atomic_set(&new->nfm_ref, 1); + refcount_set(&new->nfm_ref, 1); err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); @@ -186,7 +185,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, nf->nf_flags = 0; nf->nf_inode = inode; nf->nf_hashval = hashval; - atomic_set(&nf->nf_ref, 1); + refcount_set(&nf->nf_ref, 1); nf->nf_may = may & NFSD_FILE_MAY_MASK; if (may & NFSD_MAY_NOT_BREAK_LEASE) { if (may & NFSD_MAY_WRITE) @@ -280,7 +279,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ - if (atomic_add_unless(&nf->nf_ref, -1, 1)) + if (refcount_dec_not_one(&nf->nf_ref)) return true; list_add(&nf->nf_lru, dispose); @@ -292,7 +291,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) { trace_nfsd_file_put(nf); - if (atomic_dec_and_test(&nf->nf_ref)) { + if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); nfsd_file_free(nf); } @@ -304,7 +303,7 @@ nfsd_file_put(struct nfsd_file *nf) bool is_hashed; set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (atomic_read(&nf->nf_ref) > 2 || !nf->nf_file) { + if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { nfsd_file_put_noref(nf); return; } @@ -321,7 +320,7 @@ nfsd_file_put(struct nfsd_file *nf) struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(atomic_inc_not_zero(&nf->nf_ref))) + if (likely(refcount_inc_not_zero(&nf->nf_ref))) return nf; return NULL; } @@ -347,7 +346,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); - if (!atomic_dec_and_test(&nf->nf_ref)) + if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) flush = true; @@ -430,7 +429,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (atomic_read(&nf->nf_ref) > 1) + if (refcount_read(&nf->nf_ref) > 1) goto out_skip; /* @@ -1019,7 +1018,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, open_file: nf = new; /* Take reference for the hashtable */ - atomic_inc(&nf->nf_ref); + refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); list_lru_add(&nfsd_file_lru, &nf->nf_lru); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 986c325a54bd3..7872df5a0fe3a 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -19,7 +19,7 @@ */ struct nfsd_file_mark { struct fsnotify_mark nfm_mark; - atomic_t nfm_ref; + refcount_t nfm_ref; }; /* @@ -43,7 +43,7 @@ struct nfsd_file { unsigned long nf_flags; struct inode *nf_inode; unsigned int nf_hashval; - atomic_t nf_ref; + refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; struct rw_semaphore nf_rwsem; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 17ecef404e5bd..06dd0d3370492 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -201,7 +201,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class, TP_fast_assign( __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; - __entry->nf_ref = atomic_read(&nf->nf_ref); + __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; @@ -250,7 +250,7 @@ TRACE_EVENT(nfsd_file_acquire, __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; - __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0; + __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; __entry->nf_flags = nf ? nf->nf_flags : 0; __entry->nf_may = nf ? nf->nf_may : 0; __entry->nf_file = nf ? nf->nf_file : NULL; From 91fd3c3edcd7f223c5340a8324f67b03a85a73aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 13 Jan 2020 16:23:07 +0300 Subject: [PATCH 53/55] nfsd4: fix double free in nfsd4_do_async_copy() This frees "copy->nf_src" before and again after the goto. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dan Carpenter Reviewed-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e4ddaf87493ae..0e75f7fb5fec0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1469,7 +1469,6 @@ static int nfsd4_do_async_copy(void *data) copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, ©->stateid); if (IS_ERR(copy->nf_src->nf_file)) { - kfree(copy->nf_src); copy->nfserr = nfserr_offload_denied; nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; From 50d0def966a5f1237ba647e827a945e8ece4c10b Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Mon, 3 Feb 2020 09:43:57 +0800 Subject: [PATCH 54/55] nfsd: make nfsd_filecache_wq variable static Fix sparse warning: fs/nfsd/filecache.c:55:25: warning: symbol 'nfsd_filecache_wq' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Chen Zhou Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 23c1fa5da1e9e..22e77ede9f146 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -52,7 +52,7 @@ struct nfsd_fcache_disposal { struct rcu_head rcu; }; -struct workqueue_struct *nfsd_filecache_wq __read_mostly; +static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; From 3d96208c30f84d6edf9ab4fac813306ac0d20c10 Mon Sep 17 00:00:00 2001 From: Roberto Bergantinos Corpas Date: Tue, 4 Feb 2020 11:32:56 +0100 Subject: [PATCH 55/55] sunrpc: expiry_time should be seconds not timeval When upcalling gssproxy, cache_head.expiry_time is set as a timeval, not seconds since boot. As such, RPC cache expiry logic will not clean expired objects created under auth.rpcsec.context cache. This has proven to cause kernel memory leaks on field. Using 64 bit variants of getboottime/timespec Expiration times have worked this way since 2010's c5b29f885afe "sunrpc: use seconds since boot in expiry cache". The gssproxy code introduced in 2012 added gss_proxy_save_rsc and introduced the bug. That's a while for this to lurk, but it required a bit of an extreme case to make it obvious. Signed-off-by: Roberto Bergantinos Corpas Cc: stable@vger.kernel.org Fixes: 030d794bf498 "SUNRPC: Use gssproxy upcall for server..." Tested-By: Frank Sorenson Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c62d1f10978b8..cff77f0966471 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1248,6 +1248,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, dprintk("RPC: No creds found!\n"); goto out; } else { + struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; @@ -1268,6 +1269,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, &expiry, GFP_KERNEL); if (status) goto out; + + getboottime64(&boot); + expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry;