Skip to content

Commit

Permalink
IB/hfi1: Optimize lkey validation structures
Browse files Browse the repository at this point in the history
Profiling shows that the key validation is susceptible
to cache line trading when accessing the lkey table.

Fix by separating out the read mostly fields from the write
fields.   In addition the shift amount, which is function
of the lkey table size, is precomputed and stored with the
table pointer.   Since both the shift and table pointer
are in the same read mostly cacheline, this saves a cache
line in this hot path.

Reviewed-by: Sebastian Sanchez <[email protected]>
Signed-off-by: Mike Marciniszyn <[email protected]>
Signed-off-by: Dennis Dalessandro <[email protected]>
Signed-off-by: Doug Ledford <[email protected]>
  • Loading branch information
mmarcini authored and dledford committed Nov 15, 2016
1 parent 63df8e0 commit 99f80d2
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
10 changes: 5 additions & 5 deletions drivers/infiniband/sw/rdmavt/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
lkey_table_size = rdi->dparms.lkey_table_size;
}
rdi->lkey_table.max = 1 << lkey_table_size;
rdi->lkey_table.shift = 32 - lkey_table_size;
lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
rdi->lkey_table.table = (struct rvt_mregion __rcu **)
vmalloc_node(lk_tab_size, rdi->dparms.node);
Expand Down Expand Up @@ -774,14 +775,15 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_mregion *mr;
unsigned n, m;
size_t off;
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);

/*
* We use LKEY == zero for kernel virtual addresses
* (see rvt_get_dma_mr and dma.c).
*/
rcu_read_lock();
if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);

if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
Expand All @@ -798,8 +800,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0;
goto ok;
}
mr = rcu_dereference(
rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
Expand Down Expand Up @@ -899,8 +900,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
goto ok;
}

mr = rcu_dereference(
rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
Expand Down
10 changes: 7 additions & 3 deletions include/rdma/rdmavt_mr.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,15 @@ struct rvt_mregion {
#define RVT_MAX_LKEY_TABLE_BITS 23

struct rvt_lkey_table {
spinlock_t lock; /* protect changes in this struct */
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
/* read mostly fields */
u32 max; /* size of the table */
u32 shift; /* lkey/rkey shift */
struct rvt_mregion __rcu **table;
/* writeable fields */
/* protect changes in this struct */
spinlock_t lock ____cacheline_aligned_in_smp;
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
};

/*
Expand Down

0 comments on commit 99f80d2

Please sign in to comment.