Skip to content

Commit

Permalink
IB/iser: Use IB_WR_REG_MR_INTEGRITY for PI handover
Browse files Browse the repository at this point in the history
Using this new API reduces iSER code complexity.
It also reduces the maximum number of work requests per task and the need
of dealing with multiple MRs (and their registrations and invalidations)
per task. It is done by using a single WR and a special MR type
(IB_MR_TYPE_INTEGRITY) for PI operation.

The setup of the tested benchmark:
 - 2 servers with 24 cores (1 initiator and 1 target)
 - 24 target sessions with 1 LUN each
 - ramdisk backstore
 - PI active

Performance results running fio (24 jobs, 128 iodepth) using
write_generate=0 and read_verify=0 (w/w.o patch):

bs      IOPS(read)        IOPS(write)
----    ----------        ----------
512     1236.6K/1164.3K   1357.2K/1332.8K
1k      1196.5K/1163.8K   1348.4K/1262.7K
2k      1016.7K/921950    1003.7K/931230
4k      662728/600545     595423/501513
8k      385954/384345     333775/277090
16k     222864/222820     170317/170671
32k     116869/114896     82331/82244
64k     55205/54931       40264/40021

Using write_generate=1 and read_verify=1 (w/w.o patch):

bs      IOPS(read)        IOPS(write)
----    ----------        ----------
512     1090.1K/1030.9K   1303.9K/1101.4K
1k      1057.7K/904583    1318.4K/988085
2k      965226/638799     1008.6K/692514
4k      555479/410151     542414/414517
8k      298675/224964     264729/237508
16k     133485/122481     164625/138647
32k     74329/67615       80143/78743
64k     35716/35519       39294/37334

We get performance improvement at all block sizes.
The most significant improvement is when writing 4k bs (almost 30% more
iops).

Signed-off-by: Israel Rukshin <[email protected]>
Reviewed-by: Max Gurtovoy <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Reviewed-by: Sagi Grimberg <[email protected]>
Signed-off-by: Jason Gunthorpe <[email protected]>
  • Loading branch information
Israel Rukshin authored and jgunthorpe committed Jun 24, 2019
1 parent 38ca87c commit b76a439
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 193 deletions.
38 changes: 8 additions & 30 deletions drivers/infiniband/ulp/iser/iscsi_iser.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,11 @@ enum iser_desc_type {
ISCSI_TX_DATAOUT
};

/* Maximum number of work requests per task:
* Data memory region local invalidate + fast registration
* Protection memory region local invalidate + fast registration
* Signature memory region local invalidate + fast registration
* PDU send
/*
* Maximum number of work requests per task
* (invalidate, registration, send)
*/
#define ISER_MAX_WRS 7
#define ISER_MAX_WRS 3

/**
* struct iser_tx_desc - iSER TX descriptor
Expand All @@ -247,9 +245,6 @@ enum iser_desc_type {
* @mapped: Is the task header mapped
* @wr_idx: Current WR index
* @wrs: Array of WRs per task
* @data_reg: Data buffer registration details
* @prot_reg: Protection buffer registration details
* @sig_attrs: Signature attributes
*/
struct iser_tx_desc {
struct iser_ctrl iser_header;
Expand All @@ -264,11 +259,7 @@ struct iser_tx_desc {
union iser_wr {
struct ib_send_wr send;
struct ib_reg_wr fast_reg;
struct ib_sig_handover_wr sig;
} wrs[ISER_MAX_WRS];
struct iser_mem_reg data_reg;
struct iser_mem_reg prot_reg;
struct ib_sig_attrs sig_attrs;
};

#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
Expand Down Expand Up @@ -388,6 +379,7 @@ struct iser_device {
*
* @mr: memory region
* @fmr_pool: pool of fmrs
* @sig_mr: signature memory region
* @page_vec: fast reg page list used by fmr pool
* @mr_valid: is mr valid indicator
*/
Expand All @@ -396,36 +388,22 @@ struct iser_reg_resources {
struct ib_mr *mr;
struct ib_fmr_pool *fmr_pool;
};
struct ib_mr *sig_mr;
struct iser_page_vec *page_vec;
u8 mr_valid:1;
};

/**
* struct iser_pi_context - Protection information context
*
* @rsc: protection buffer registration resources
* @sig_mr: signature enable memory region
* @sig_mr_valid: is sig_mr valid indicator
* @sig_protected: is region protected indicator
*/
struct iser_pi_context {
struct iser_reg_resources rsc;
struct ib_mr *sig_mr;
u8 sig_mr_valid:1;
u8 sig_protected:1;
};

/**
* struct iser_fr_desc - Fast registration descriptor
*
* @list: entry in connection fastreg pool
* @rsc: data buffer registration resources
* @pi_ctx: protection information context
* @sig_protected: is region protected indicator
*/
struct iser_fr_desc {
struct list_head list;
struct iser_reg_resources rsc;
struct iser_pi_context *pi_ctx;
bool sig_protected;
struct list_head all_list;
};

Expand Down
12 changes: 7 additions & 5 deletions drivers/infiniband/ulp/iser/iser_initiator.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,15 +592,14 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
static inline int
iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
{
if (likely(rkey == desc->rsc.mr->rkey)) {
desc->rsc.mr_valid = 0;
} else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) {
desc->pi_ctx->sig_mr_valid = 0;
} else {
if (unlikely((!desc->sig_protected && rkey != desc->rsc.mr->rkey) ||
(desc->sig_protected && rkey != desc->rsc.sig_mr->rkey))) {
iser_err("Bogus remote invalidation for rkey %#x\n", rkey);
return -EINVAL;
}

desc->rsc.mr_valid = 0;

return 0;
}

Expand Down Expand Up @@ -750,6 +749,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0;

iser_task->prot[ISER_DIR_IN].dma_nents = 0;
iser_task->prot[ISER_DIR_OUT].dma_nents = 0;

memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
sizeof(struct iser_mem_reg));
memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
Expand Down
98 changes: 35 additions & 63 deletions drivers/infiniband/ulp/iser/iser_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,16 +376,16 @@ iser_inv_rkey(struct ib_send_wr *inv_wr,

static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct iser_pi_context *pi_ctx,
struct iser_mem_reg *data_reg,
struct iser_mem_reg *prot_reg,
struct iser_data_buf *mem,
struct iser_data_buf *sig_mem,
struct iser_reg_resources *rsc,
struct iser_mem_reg *sig_reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_sig_handover_wr *wr;
struct ib_mr *mr = pi_ctx->sig_mr;
struct ib_mr *mr = rsc->sig_mr;
struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
struct ib_reg_wr *wr;
int ret;

memset(sig_attrs, 0, sizeof(*sig_attrs));
Expand All @@ -395,33 +395,36 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,

iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);

if (pi_ctx->sig_mr_valid)
if (rsc->mr_valid)
iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);

ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));

wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr,
wr);
wr->wr.opcode = IB_WR_REG_SIG_MR;
ret = ib_map_mr_sg_pi(mr, mem->sg, mem->dma_nents, NULL,
sig_mem->sg, sig_mem->dma_nents, NULL, SZ_4K);
if (unlikely(ret)) {
iser_err("failed to map PI sg (%d)\n",
mem->dma_nents + sig_mem->dma_nents);
goto err;
}

wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr);
memset(wr, 0, sizeof(*wr));
wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
wr->wr.wr_cqe = cqe;
wr->wr.sg_list = &data_reg->sge;
wr->wr.num_sge = 1;
wr->wr.num_sge = 0;
wr->wr.send_flags = 0;
wr->sig_attrs = sig_attrs;
wr->sig_mr = mr;
if (scsi_prot_sg_count(iser_task->sc))
wr->prot = &prot_reg->sge;
else
wr->prot = NULL;
wr->access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
pi_ctx->sig_mr_valid = 1;
wr->mr = mr;
wr->key = mr->rkey;
wr->access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
rsc->mr_valid = 1;

sig_reg->sge.lkey = mr->lkey;
sig_reg->rkey = mr->rkey;
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
sig_reg->sge.addr = mr->iova;
sig_reg->sge.length = mr->length;

iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
Expand Down Expand Up @@ -477,21 +480,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
return 0;
}

static int
iser_reg_prot_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
bool use_dma_key,
struct iser_mem_reg *reg)
{
struct iser_device *device = task->iser_conn->ib_conn.device;

if (use_dma_key)
return iser_reg_dma(device, mem, reg);

return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
}

static int
iser_reg_data_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
Expand All @@ -515,7 +503,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
struct iser_device *device = ib_conn->device;
struct iser_data_buf *mem = &task->data[dir];
struct iser_mem_reg *reg = &task->rdma_reg[dir];
struct iser_mem_reg *data_reg;
struct iser_fr_desc *desc = NULL;
bool use_dma_key;
int err;
Expand All @@ -528,32 +515,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
reg->mem_h = desc;
}

if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL)
data_reg = reg;
else
data_reg = &task->desc.data_reg;

err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg);
if (unlikely(err))
goto err_reg;

if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
struct iser_mem_reg *prot_reg = &task->desc.prot_reg;

if (scsi_prot_sg_count(task->sc)) {
mem = &task->prot[dir];
err = iser_reg_prot_sg(task, mem, desc,
use_dma_key, prot_reg);
if (unlikely(err))
goto err_reg;
}

err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg,
prot_reg, reg);
if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) {
err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg);
if (unlikely(err))
goto err_reg;
} else {
err = iser_reg_sig_mr(task, mem, &task->prot[dir],
&desc->rsc, reg);
if (unlikely(err))
goto err_reg;

desc->pi_ctx->sig_protected = 1;
desc->sig_protected = 1;
}

return 0;
Expand Down
Loading

0 comments on commit b76a439

Please sign in to comment.