Skip to content

Commit

Permalink
svcrdma: Allocate recv_ctxt's on CPU handling Receives
Browse files Browse the repository at this point in the history
There is a significant latency penalty when processing an ingress
Receive if the Receive buffer resides in memory that is not on the
same NUMA node as the the CPU handling completions for a CQ.

The system administrator and the device driver determine which CPU
handles completions. This CPU does not change during life of the CQ.
Further the Upper Layer does not have any visibility of which CPU it
is.

Allocating Receive buffers in the Receive completion handler
guarantees that Receive buffers are allocated on the preferred NUMA
node for that CQ.

Signed-off-by: Chuck Lever <[email protected]>
Signed-off-by: J. Bruce Fields <[email protected]>
  • Loading branch information
chucklever authored and J. Bruce Fields committed May 11, 2018
1 parent 3316f06 commit eb5d7a6
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 16 deletions.
1 change: 1 addition & 0 deletions include/linux/sunrpc/svc_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge;
void *rc_recv_buf;
struct xdr_buf rc_arg;
bool rc_temp;
u32 rc_byte_len;
unsigned int rc_page_count;
unsigned int rc_hdr_count;
Expand Down
52 changes: 36 additions & 16 deletions net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer;
ctxt->rc_temp = false;
return ctxt;

fail2:
Expand All @@ -154,6 +155,15 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
return NULL;
}

static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
kfree(ctxt->rc_recv_buf);
kfree(ctxt);
}

/**
* svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
* @rdma: svcxprt_rdma being torn down
Expand All @@ -165,12 +175,7 @@ void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)

while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
list_del(&ctxt->rc_list);
ib_dma_unmap_single(rdma->sc_pd->device,
ctxt->rc_recv_sge.addr,
ctxt->rc_recv_sge.length,
DMA_FROM_DEVICE);
kfree(ctxt->rc_recv_buf);
kfree(ctxt);
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
}

Expand Down Expand Up @@ -212,21 +217,21 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,

for (i = 0; i < ctxt->rc_page_count; i++)
put_page(ctxt->rc_pages[i]);
spin_lock(&rdma->sc_recv_lock);
list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
spin_unlock(&rdma->sc_recv_lock);

if (!ctxt->rc_temp) {
spin_lock(&rdma->sc_recv_lock);
list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
spin_unlock(&rdma->sc_recv_lock);
} else
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}

static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
struct svc_rdma_recv_ctxt *ctxt;
struct ib_recv_wr *bad_recv_wr;
int ret;

ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;

svc_xprt_get(&rdma->sc_xprt);
ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
Expand All @@ -240,6 +245,16 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
return ret;
}

static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;

ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;
return __svc_rdma_post_recv(rdma, ctxt);
}

/**
* svc_rdma_post_recvs - Post initial set of Recv WRs
* @rdma: fresh svcxprt_rdma
Expand All @@ -248,11 +263,16 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
unsigned int i;
int ret;

for (i = 0; i < rdma->sc_max_requests; i++) {
ret = svc_rdma_post_recv(rdma);
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;
ctxt->rc_temp = true;
ret = __svc_rdma_post_recv(rdma, ctxt);
if (ret) {
pr_err("svcrdma: failure posting recv buffers: %d\n",
ret);
Expand Down

0 comments on commit eb5d7a6

Please sign in to comment.