Skip to content

Commit

Permalink
sparc: Make sparc64 use scalable lib/iommu-common.c functions
Browse files Browse the repository at this point in the history
In iperf experiments running linux as the Tx side (TCP client) with
10 threads results in a severe performance drop when TSO is disabled,
indicating a weakness in the software that can be avoided by using
the scalable IOMMU arena DMA allocation.

Baseline numbers before this patch:
   with default settings (TSO enabled) :    9-9.5 Gbps
   Disable TSO using ethtool- drops badly:  2-3 Gbps.

After this patch, iperf client with 10 threads, can give a
throughput of at least 8.5 Gbps, even when TSO is disabled.

Signed-off-by: Sowmini Varadhan <[email protected]>
Acked-by: Benjamin Herrenschmidt <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
sowminiv authored and davem330 committed Apr 18, 2015
1 parent ff7d37a commit bb620c3
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 242 deletions.
7 changes: 3 additions & 4 deletions arch/sparc/include/asm/iommu_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define IOPTE_WRITE 0x0000000000000002UL

#define IOMMU_NUM_CTXS 4096
#include <linux/iommu-common.h>

struct iommu_arena {
unsigned long *map;
Expand All @@ -24,11 +25,10 @@ struct iommu_arena {
};

struct iommu {
struct iommu_map_table tbl;
spinlock_t lock;
struct iommu_arena arena;
void (*flush_all)(struct iommu *);
u32 dma_addr_mask;
iopte_t *page_table;
u32 page_table_map_base;
unsigned long iommu_control;
unsigned long iommu_tsbbase;
unsigned long iommu_flush;
Expand All @@ -40,7 +40,6 @@ struct iommu {
unsigned long dummy_page_pa;
unsigned long ctx_lowest_free;
DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
u32 dma_addr_mask;
};

struct strbuf {
Expand Down
172 changes: 43 additions & 129 deletions arch/sparc/kernel/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/iommu-helper.h>
#include <linux/bitmap.h>
#include <linux/iommu-common.h>

#ifdef CONFIG_PCI
#include <linux/pci.h>
Expand Down Expand Up @@ -45,8 +46,9 @@
"i" (ASI_PHYS_BYPASS_EC_E))

/* Must be invoked under the IOMMU lock. */
static void iommu_flushall(struct iommu *iommu)
static void iommu_flushall(struct iommu_map_table *iommu_map_table)
{
struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
if (iommu->iommu_flushinv) {
iommu_write(iommu->iommu_flushinv, ~(u64)0);
} else {
Expand Down Expand Up @@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
iopte_val(*iopte) = val;
}

/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
* facility it must all be done in one pass while under the iommu lock.
*
* On sun4u platforms, we only flush the IOMMU once every time we've passed
* over the entire page table doing allocations. Therefore we only ever advance
* the hint and cannot backtrack it.
*/
unsigned long iommu_range_alloc(struct device *dev,
struct iommu *iommu,
unsigned long npages,
unsigned long *handle)
{
unsigned long n, end, start, limit, boundary_size;
struct iommu_arena *arena = &iommu->arena;
int pass = 0;

/* This allocator was derived from x86_64's bit string search */

/* Sanity check */
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
return DMA_ERROR_CODE;
}

if (handle && *handle)
start = *handle;
else
start = arena->hint;

limit = arena->limit;

/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the beginning and flush.
*/
if (start >= limit) {
start = 0;
if (iommu->flush_all)
iommu->flush_all(iommu);
}

again:

if (dev)
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1 << IO_PAGE_SHIFT);
else
boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);

n = iommu_area_alloc(arena->map, limit, start, npages,
iommu->page_table_map_base >> IO_PAGE_SHIFT,
boundary_size >> IO_PAGE_SHIFT, 0);
if (n == -1) {
if (likely(pass < 1)) {
/* First failure, rescan from the beginning. */
start = 0;
if (iommu->flush_all)
iommu->flush_all(iommu);
pass++;
goto again;
} else {
/* Second failure, give up */
return DMA_ERROR_CODE;
}
}

end = n + npages;

arena->hint = end;

/* Update handle for SG allocations */
if (handle)
*handle = end;

return n;
}

void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
{
struct iommu_arena *arena = &iommu->arena;
unsigned long entry;

entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;

bitmap_clear(arena->map, entry, npages);
}

int iommu_table_init(struct iommu *iommu, int tsbsize,
u32 dma_offset, u32 dma_addr_mask,
int numa_node)
Expand All @@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
/* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock);
iommu->ctx_lowest_free = 1;
iommu->page_table_map_base = dma_offset;
iommu->tbl.table_map_base = dma_offset;
iommu->dma_addr_mask = dma_addr_mask;

/* Allocate and initialize the free area map. */
sz = num_tsb_entries / 8;
sz = (sz + 7UL) & ~7UL;
iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
if (!iommu->arena.map) {
printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
if (!iommu->tbl.map)
return -ENOMEM;
}
memset(iommu->arena.map, 0, sz);
iommu->arena.limit = num_tsb_entries;
memset(iommu->tbl.map, 0, sz);

if (tlb_type != hypervisor)
iommu->flush_all = iommu_flushall;
iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
(tlb_type != hypervisor ? iommu_flushall : NULL),
false, 1, false);

/* Allocate and initialize the dummy page which we
* set inactive IO PTEs to point to.
Expand Down Expand Up @@ -235,18 +147,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
iommu->dummy_page = 0UL;

out_free_map:
kfree(iommu->arena.map);
iommu->arena.map = NULL;
kfree(iommu->tbl.map);
iommu->tbl.map = NULL;

return -ENOMEM;
}

static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
static inline iopte_t *alloc_npages(struct device *dev,
struct iommu *iommu,
unsigned long npages)
{
unsigned long entry;

entry = iommu_range_alloc(dev, iommu, npages, NULL);
entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == DMA_ERROR_CODE))
return NULL;

Expand Down Expand Up @@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp,
struct dma_attrs *attrs)
{
unsigned long flags, order, first_page;
unsigned long order, first_page;
struct iommu *iommu;
struct page *page;
int npages, nid;
Expand All @@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,

iommu = dev->archdata.iommu;

spin_lock_irqsave(&iommu->lock, flags);
iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
spin_unlock_irqrestore(&iommu->lock, flags);

if (unlikely(iopte == NULL)) {
free_pages(first_page, order);
return NULL;
}

*dma_addrp = (iommu->page_table_map_base +
*dma_addrp = (iommu->tbl.table_map_base +
((iopte - iommu->page_table) << IO_PAGE_SHIFT));
ret = (void *) first_page;
npages = size >> IO_PAGE_SHIFT;
Expand All @@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
struct dma_attrs *attrs)
{
struct iommu *iommu;
unsigned long flags, order, npages;
unsigned long order, npages;

npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;

spin_lock_irqsave(&iommu->lock, flags);

iommu_range_free(iommu, dvma, npages);

spin_unlock_irqrestore(&iommu->lock, flags);
iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);

order = get_order(size);
if (order < 10)
Expand Down Expand Up @@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;

spin_lock_irqsave(&iommu->lock, flags);
base = alloc_npages(dev, iommu, npages);
spin_lock_irqsave(&iommu->lock, flags);
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu_alloc_ctx(iommu);
Expand All @@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
if (unlikely(!base))
goto bad;

bus_addr = (iommu->page_table_map_base +
bus_addr = (iommu->tbl.table_map_base +
((base - iommu->page_table) << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK);
Expand Down Expand Up @@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
base = iommu->page_table +
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
bus_addr &= IO_PAGE_MASK;

spin_lock_irqsave(&iommu->lock, flags);
Expand All @@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
for (i = 0; i < npages; i++)
iopte_make_dummy(iommu, base + i);

iommu_range_free(iommu, bus_addr, npages);

iommu_free_ctx(iommu, ctx);

spin_unlock_irqrestore(&iommu->lock, flags);

iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
}

static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
Expand Down Expand Up @@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
max_seg_size = dma_get_max_seg_size(dev);
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
for_each_sg(sglist, s, nelems, i) {
unsigned long paddr, npages, entry, out_entry = 0, slen;
iopte_t *base;
Expand All @@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
/* Allocate iommu entries for that segment */
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
entry = iommu_range_alloc(dev, iommu, npages, &handle);
entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
&handle, (unsigned long)(-1), 0);

/* Handle failure */
if (unlikely(entry == DMA_ERROR_CODE)) {
Expand All @@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
base = iommu->page_table + entry;

/* Convert entry to a dma_addr_t */
dma_addr = iommu->page_table_map_base +
dma_addr = iommu->tbl.table_map_base +
(entry << IO_PAGE_SHIFT);
dma_addr |= (s->offset & ~IO_PAGE_MASK);

Expand Down Expand Up @@ -654,15 +562,17 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
vaddr = s->dma_address & IO_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length,
IO_PAGE_SIZE);
iommu_range_free(iommu, vaddr, npages);

entry = (vaddr - iommu->page_table_map_base)
entry = (vaddr - iommu->tbl.table_map_base)
>> IO_PAGE_SHIFT;
base = iommu->page_table + entry;

for (j = 0; j < npages; j++)
iopte_make_dummy(iommu, base + j);

iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
DMA_ERROR_CODE);

s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
}
Expand All @@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
if (iommu->iommu_ctxflush) {
iopte_t *base;
u32 bus_addr;
struct iommu_map_table *tbl = &iommu->tbl;

bus_addr = sg->dma_address & IO_PAGE_MASK;
base = iommu->page_table +
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);

ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
}
Expand Down Expand Up @@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
if (!len)
break;
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
iommu_range_free(iommu, dma_handle, npages);

entry = ((dma_handle - iommu->page_table_map_base)
entry = ((dma_handle - iommu->tbl.table_map_base)
>> IO_PAGE_SHIFT);
base = iommu->page_table + entry;

Expand All @@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
for (i = 0; i < npages; i++)
iopte_make_dummy(iommu, base + i);

iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
DMA_ERROR_CODE);
sg = sg_next(sg);
}

Expand Down Expand Up @@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
if (iommu->iommu_ctxflush &&
strbuf->strbuf_ctxflush) {
iopte_t *iopte;
struct iommu_map_table *tbl = &iommu->tbl;

iopte = iommu->page_table +
((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
}

Expand Down Expand Up @@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
if (iommu->iommu_ctxflush &&
strbuf->strbuf_ctxflush) {
iopte_t *iopte;
struct iommu_map_table *tbl = &iommu->tbl;

iopte = iommu->page_table +
((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
iopte = iommu->page_table + ((sglist[0].dma_address -
tbl->table_map_base) >> IO_PAGE_SHIFT);
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
}

Expand Down
Loading

0 comments on commit bb620c3

Please sign in to comment.