Skip to content

Commit

Permalink
xprtrdma: Add ro_unmap_safe memreg method
Browse files Browse the repository at this point in the history
There needs to be a safe method of releasing registered memory
resources when an RPC terminates. Safe can mean a number of things:

+ Doesn't have to sleep

+ Doesn't rely on having a QP in RTS

ro_unmap_safe will be that safe method. It can be used in cases
where synchronous memory invalidation can deadlock, or needs to have
an active QP.

The important case is fencing an RPC's memory regions after it is
signaled (^C) and before it exits. If this is not done, there is a
window where the server can write an RPC reply into memory that the
client has released and re-used for some other purpose.

Note that this is a full solution for FRWR, but FMR and physical
still have some gaps where a particularly bad server can wreak
some havoc on the client. These gaps are not made worse by this
patch and are expected to be exceptionally rare and timing-based.
They are noted in documenting comments.

Signed-off-by: Chuck Lever <[email protected]>
Tested-by: Steve Wise <[email protected]>
Signed-off-by: Anna Schumaker <[email protected]>
  • Loading branch information
chucklever authored and amschuma-ntap committed May 17, 2016
1 parent 763bc23 commit ead3f26
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 19 deletions.
105 changes: 96 additions & 9 deletions net/sunrpc/xprtrdma/fmr_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,64 @@
/* Maximum scatter/gather per FMR */
#define RPCRDMA_MAX_FMR_SGES (64)

static struct workqueue_struct *fmr_recovery_wq;

#define FMR_RECOVERY_WQ_FLAGS (WQ_UNBOUND)

int
fmr_alloc_recovery_wq(void)
{
fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0);
return !fmr_recovery_wq ? -ENOMEM : 0;
}

void
fmr_destroy_recovery_wq(void)
{
struct workqueue_struct *wq;

if (!fmr_recovery_wq)
return;

wq = fmr_recovery_wq;
fmr_recovery_wq = NULL;
destroy_workqueue(wq);
}

static int
__fmr_unmap(struct rpcrdma_mw *mw)
{
LIST_HEAD(l);

list_add(&mw->fmr.fmr->list, &l);
return ib_unmap_fmr(&l);
}

/* Deferred reset of a single FMR. Generate a fresh rkey by
* replacing the MR. There's no recovery if this fails.
*/
static void
__fmr_recovery_worker(struct work_struct *work)
{
struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw,
mw_work);
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;

__fmr_unmap(mw);
rpcrdma_put_mw(r_xprt, mw);
return;
}

/* A broken MR was discovered in a context that can't sleep.
* Defer recovery to the recovery worker.
*/
static void
__fmr_queue_recovery(struct rpcrdma_mw *mw)
{
INIT_WORK(&mw->mw_work, __fmr_recovery_worker);
queue_work(fmr_recovery_wq, &mw->mw_work);
}

static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
Expand Down Expand Up @@ -92,6 +150,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
if (IS_ERR(r->fmr.fmr))
goto out_fmr_err;

r->mw_xprt = r_xprt;
list_add(&r->mw_list, &buf->rb_mws);
list_add(&r->mw_all, &buf->rb_all);
}
Expand All @@ -107,15 +166,6 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
return rc;
}

static int
__fmr_unmap(struct rpcrdma_mw *r)
{
LIST_HEAD(l);

list_add(&r->fmr.fmr->list, &l);
return ib_unmap_fmr(&l);
}

/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
Expand Down Expand Up @@ -242,6 +292,42 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
req->rl_nchunks = 0;
}

/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for "req".
*
* In the asynchronous case, DMA unmapping occurs first here
* because the rpcrdma_mr_seg is released immediately after this
* call. It's contents won't be available in __fmr_dma_unmap later.
* FIXME.
*/
static void
fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{
struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw;
unsigned int i;

for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
seg = &req->rl_segments[i];
mw = seg->rl_mw;

if (sync) {
/* ORDER */
__fmr_unmap(mw);
__fmr_dma_unmap(r_xprt, seg);
rpcrdma_put_mw(r_xprt, mw);
} else {
__fmr_dma_unmap(r_xprt, seg);
__fmr_queue_recovery(mw);
}

i += seg->mr_nsegs;
seg->mr_nsegs = 0;
seg->rl_mw = NULL;
}
}

/* Use the ib_unmap_fmr() verb to prevent further remote
* access via RDMA READ or RDMA WRITE.
*/
Expand Down Expand Up @@ -295,6 +381,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap_safe = fmr_op_unmap_safe,
.ro_unmap = fmr_op_unmap,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
Expand Down
27 changes: 27 additions & 0 deletions net/sunrpc/xprtrdma/frwr_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
goto unmap;
}

/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for "req".
*/
static void
frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{
struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw;
unsigned int i;

for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
seg = &req->rl_segments[i];
mw = seg->rl_mw;

if (sync)
__frwr_reset_and_unmap(r_xprt, mw);
else
__frwr_queue_recovery(mw);

i += seg->mr_nsegs;
seg->mr_nsegs = 0;
seg->rl_mw = NULL;
}
}

/* Post a LOCAL_INV Work Request to prevent further remote access
* via RDMA READ or RDMA WRITE.
*/
Expand Down Expand Up @@ -675,6 +701,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap_safe = frwr_op_unmap_safe,
.ro_unmap = frwr_op_unmap,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
Expand Down
20 changes: 20 additions & 0 deletions net/sunrpc/xprtrdma/physical_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,25 @@ physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
rpcrdma_unmap_one(device, &req->rl_segments[i++]);
}

/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for "req".
*
* For physical memory registration, there is no good way to
* fence a single MR that has been advertised to the server. The
* client has already handed the server an R_key that cannot be
* invalidated and is shared by all MRs on this connection.
* Tearing down the PD might be the only safe choice, but it's
* not clear that a freshly acquired DMA R_key would be different
* than the one used by the PD that was just destroyed.
* FIXME.
*/
static void
physical_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{
physical_op_unmap_sync(r_xprt, req);
}

static void
physical_op_destroy(struct rpcrdma_buffer *buf)
{
Expand All @@ -105,6 +124,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_map = physical_op_map,
.ro_unmap_sync = physical_op_unmap_sync,
.ro_unmap_safe = physical_op_unmap_safe,
.ro_unmap = physical_op_unmap,
.ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages,
Expand Down
5 changes: 1 addition & 4 deletions net/sunrpc/xprtrdma/rpc_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
unsigned int pos;
ssize_t hdrlen;
size_t rpclen;
__be32 *iptr;
Expand Down Expand Up @@ -697,9 +696,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
return -EIO;

out_unmap:
for (pos = 0; req->rl_nchunks--;)
pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
&req->rl_segments[pos]);
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
return PTR_ERR(iptr);
}

Expand Down
9 changes: 3 additions & 6 deletions net/sunrpc/xprtrdma/transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
out:
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
req->rl_connect_cookie = 0; /* our reserved value */
req->rl_task = task;
return req->rl_sendbuf->rg_base;

out_rdmabuf:
Expand Down Expand Up @@ -570,7 +571,6 @@ xprt_rdma_free(void *buffer)
struct rpcrdma_req *req;
struct rpcrdma_xprt *r_xprt;
struct rpcrdma_regbuf *rb;
int i;

if (buffer == NULL)
return;
Expand All @@ -584,11 +584,8 @@ xprt_rdma_free(void *buffer)

dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);

for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks;
i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
&req->rl_segments[i]);
}
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req,
!RPC_IS_ASYNC(req->rl_task));

rpcrdma_buffer_put(req);
}
Expand Down
3 changes: 3 additions & 0 deletions net/sunrpc/xprtrdma/xprt_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ struct rpcrdma_req {
unsigned int rl_niovs;
unsigned int rl_nchunks;
unsigned int rl_connect_cookie;
struct rpc_task *rl_task;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS];
Expand Down Expand Up @@ -400,6 +401,8 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_req *);
int (*ro_unmap)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *);
void (*ro_unmap_safe)(struct rpcrdma_xprt *,
struct rpcrdma_req *, bool);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
Expand Down

0 comments on commit ead3f26

Please sign in to comment.