Skip to content

Commit

Permalink
IB: Refactor umem to use linear SG table
Browse files Browse the repository at this point in the history
This patch refactors the IB core umem code and vendor drivers to use a
linear (chained) SG table instead of chunk list.  With this change the
relevant code becomes clearer—no need for nested loops to build and
use umem.

Signed-off-by: Shachar Raindel <[email protected]>
Signed-off-by: Yishai Hadas <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>
  • Loading branch information
yishaih authored and rolandd committed Mar 4, 2014
1 parent cfbf8d4 commit eeb8461
Show file tree
Hide file tree
Showing 16 changed files with 448 additions and 552 deletions.
120 changes: 56 additions & 64 deletions drivers/infiniband/core/umem.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,29 @@

#include "uverbs.h"

#define IB_UMEM_MAX_PAGE_CHUNK \
((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0]))

static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
struct ib_umem_chunk *chunk, *tmp;
struct scatterlist *sg;
struct page *page;
int i;

list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
ib_dma_unmap_sg(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->nents; ++i) {
struct page *page = sg_page(&chunk->page_list[i]);
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
umem->nmap,
DMA_BIDIRECTIONAL);

if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {

kfree(chunk);
page = sg_page(sg);
if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}

sg_free_table(&umem->sg_head);
return;

}

/**
Expand All @@ -81,23 +81,23 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
int off;
int i;
DEFINE_DMA_ATTRS(attrs);
struct scatterlist *sg, *sg_list_start;
int need_release = 0;

if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);

if (!can_do_mlock())
return ERR_PTR(-EPERM);

umem = kmalloc(sizeof *umem, GFP_KERNEL);
umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);

Expand All @@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;

INIT_LIST_HEAD(&umem->chunk_list);

page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
Expand Down Expand Up @@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,

cur_base = addr & PAGE_MASK;

ret = 0;
if (npages == 0) {
ret = -EINVAL;
goto out;
}

ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
goto out;

need_release = 1;
sg_list_start = umem->sg_head.sgl;

while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
Expand All @@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (ret < 0)
goto out;

umem->npages += ret;
cur_base += ret * PAGE_SIZE;
npages -= ret;

off = 0;

while (ret) {
chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
GFP_KERNEL);
if (!chunk) {
ret = -ENOMEM;
goto out;
}

chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
sg_init_table(chunk->page_list, chunk->nents);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
}

chunk->nmap = ib_dma_map_sg_attrs(context->device,
&chunk->page_list[0],
chunk->nents,
DMA_BIDIRECTIONAL,
&attrs);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
put_page(sg_page(&chunk->page_list[i]));
kfree(chunk);

ret = -ENOMEM;
goto out;
}

ret -= chunk->nents;
off += chunk->nents;
list_add_tail(&chunk->list, &umem->chunk_list);
for_each_sg(sg_list_start, sg, ret, i) {
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
umem->hugetlb = 0;

sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}

ret = 0;
/* preparing for next loop */
sg_list_start = sg;
}

umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
umem->npages,
DMA_BIDIRECTIONAL,
&attrs);

if (umem->nmap <= 0) {
ret = -ENOMEM;
goto out;
}

ret = 0;

out:
if (ret < 0) {
__ib_umem_release(context->device, umem, 0);
if (need_release)
__ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
current->mm->pinned_vm = locked;
Expand Down Expand Up @@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);

int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
struct scatterlist *sg;

shift = ilog2(umem->page_size);

n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
n += sg_dma_len(sg) >> shift;

return n;
}
Expand Down
23 changes: 9 additions & 14 deletions drivers/infiniband/hw/amso1100/c2_provider.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 *pages;
u64 kva = 0;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct c2_pd *c2pd = to_c2pd(pd);
struct c2_mr *c2mr;

Expand All @@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}

shift = ffs(c2mr->umem->page_size) - 1;

n = 0;
list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents;
n = c2mr->umem->nmap;

pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
if (!pages) {
Expand All @@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}

i = 0;
list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(&chunk->page_list[j]) +
(c2mr->umem->page_size * k);
}
for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(sg) +
(c2mr->umem->page_size * k);
}
}

Expand Down
19 changes: 7 additions & 12 deletions drivers/infiniband/hw/cxgb3/iwch_provider.c
Original file line number Diff line number Diff line change
Expand Up @@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;

struct scatterlist *sg;
PDBG("%s ib_pd %p\n", __func__, pd);

php = to_iwch_pd(pd);
Expand All @@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,

shift = ffs(mhp->umem->page_size) - 1;

n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
n = mhp->umem->nmap;

err = iwch_alloc_pbl(mhp, n);
if (err)
Expand All @@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,

i = n = 0;

list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
Expand All @@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = 0;
}
}
}
}

if (i)
err = iwch_write_pbl(mhp, pages, i, n);
Expand Down
39 changes: 17 additions & 22 deletions drivers/infiniband/hw/cxgb4/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -678,9 +678,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
Expand Down Expand Up @@ -710,10 +710,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,

shift = ffs(mhp->umem->page_size) - 1;

n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;

n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
if (err)
goto err;
Expand All @@ -726,24 +723,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,

i = n = 0;

list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i);
if (err)
goto pbl_done;
n += i;
i = 0;
}
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
}

if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/ehca/ehca_classes.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ struct ehca_mr_pginfo {
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
struct ib_umem_chunk *next_chunk;
struct scatterlist *next_sg;
u64 next_nmap;
} usr;
struct { /* type EHCA_MR_PGI_FMR section */
Expand Down
Loading

0 comments on commit eeb8461

Please sign in to comment.