Skip to content

Commit

Permalink
Merge branch 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/li…
Browse files Browse the repository at this point in the history
…nux into drm-next

Fix regression, and some locking races, also as CS support
for the DMA engines.

* 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux:
  radeon: fix regression with eviction since evict caching changes
  drm/radeon: add more pedantic checks in the CP DMA checker
  drm/radeon: bump version for CS ioctl support for async DMA
  drm/radeon: enable the async DMA rings in the CS ioctl
  drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI
  drm/radeon/kms: add evergreen/cayman CS parser for async DMA (v2)
  drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2)
  drm/radeon: fix htile buffer size computation for command stream checker
  drm/radeon: fix fence locking in the pageflip callback
  drm/radeon: make indirect register access concurrency-safe
  drm/radeon: add W|RREG32_IDX for MM_INDEX|DATA based mmio accesss
  • Loading branch information
airlied committed Dec 15, 2012
2 parents 9add1ac + dd54fee commit 652a187
Show file tree
Hide file tree
Showing 16 changed files with 897 additions and 127 deletions.
603 changes: 600 additions & 3 deletions drivers/gpu/drm/radeon/evergreen_cs.c

Large diffs are not rendered by default.

23 changes: 18 additions & 5 deletions drivers/gpu/drm/radeon/r100.c
Original file line number Diff line number Diff line change
Expand Up @@ -4135,23 +4135,36 @@ int r100_init(struct radeon_device *rdev)
return 0;
}

uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
bool always_indirect)
{
if (reg < rdev->rmmio_size)
if (reg < rdev->rmmio_size && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg);
else {
unsigned long flags;
uint32_t ret;

spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);

return ret;
}
}

void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
bool always_indirect)
{
if (reg < rdev->rmmio_size)
if (reg < rdev->rmmio_size && !always_indirect)
writel(v, ((void __iomem *)rdev->rmmio) + reg);
else {
unsigned long flags;

spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
}
}

Expand Down
285 changes: 212 additions & 73 deletions drivers/gpu/drm/radeon/r600_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
/* htile widht & nby (8 or 4) make 2 bits number */
tmp = track->htile_surface & 3;
/* always assume 8x8 htile */
/* align is htile align * 8, htile align vary according to
* number of pipe and tile width and nby
*/
switch (track->npipes) {
case 8:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
default:
return -EINVAL;
}
/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 4:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
default:
return -EINVAL;
}
/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 16 * 8);
nby = round_up(nby, 16 * 8);
break;
default:
return -EINVAL;
}
/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 1:
switch (tmp) {
case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 16 * 8);
nby = round_up(nby, 16 * 8);
break;
case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
nbx = round_up(nbx, 16 * 8);
nby = round_up(nby, 8 * 8);
break;
default:
return -EINVAL;
}
/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
default:
dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
Expand All @@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
}
}
/* compute number of htile */
nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
size = nbx * nby * 4;
nbx = nbx >> 3;
nby = nby >> 3;
/* size must be aligned on npipes * 2K boundary */
size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;

if (size > radeon_bo_size(track->htile_bo)) {
Expand Down Expand Up @@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case DB_HTILE_SURFACE:
track->htile_surface = radeon_get_ib_value(p, idx);
/* force 8x8 htile width and height */
ib[idx] |= 3;
track->db_dirty = true;
break;
case SQ_PGM_START_FS:
Expand Down Expand Up @@ -2568,3 +2514,196 @@ void r600_cs_legacy_init(void)
{
r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
}

/*
* DMA
*/
/**
* r600_dma_cs_next_reloc() - parse next reloc
* @p: parser structure holding parsing context.
* @cs_reloc: reloc informations
*
* Return the next reloc, do bo validation and compute
* GPU offset using the provided start.
**/
int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
struct radeon_cs_reloc **cs_reloc)
{
struct radeon_cs_chunk *relocs_chunk;
unsigned idx;

if (p->chunk_relocs_idx == -1) {
DRM_ERROR("No relocation chunk !\n");
return -EINVAL;
}
*cs_reloc = NULL;
relocs_chunk = &p->chunks[p->chunk_relocs_idx];
idx = p->dma_reloc_idx;
if (idx >= relocs_chunk->length_dw) {
DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
idx, relocs_chunk->length_dw);
return -EINVAL;
}
*cs_reloc = p->relocs_ptr[idx];
p->dma_reloc_idx++;
return 0;
}

#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)

/**
* r600_dma_cs_parse() - parse the DMA IB
* @p: parser structure holding parsing context.
*
* Parses the DMA IB from the CS ioctl and updates
* the GPU addresses based on the reloc information and
* checks for errors. (R6xx-R7xx)
* Returns 0 for success and an error on failure.
**/
int r600_dma_cs_parse(struct radeon_cs_parser *p)
{
struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
struct radeon_cs_reloc *src_reloc, *dst_reloc;
u32 header, cmd, count, tiled;
volatile u32 *ib = p->ib.ptr;
u32 idx, idx_value;
u64 src_offset, dst_offset;
int r;

do {
if (p->idx >= ib_chunk->length_dw) {
DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
p->idx, ib_chunk->length_dw);
return -EINVAL;
}
idx = p->idx;
header = radeon_get_ib_value(p, idx);
cmd = GET_DMA_CMD(header);
count = GET_DMA_COUNT(header);
tiled = GET_DMA_T(header);

switch (cmd) {
case DMA_PACKET_WRITE:
r = r600_dma_cs_next_reloc(p, &dst_reloc);
if (r) {
DRM_ERROR("bad DMA_PACKET_WRITE\n");
return -EINVAL;
}
if (tiled) {
dst_offset = ib[idx+1];
dst_offset <<= 8;

ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
p->idx += count + 5;
} else {
dst_offset = ib[idx+1];
dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;

ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
p->idx += count + 3;
}
if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
return -EINVAL;
}
break;
case DMA_PACKET_COPY:
r = r600_dma_cs_next_reloc(p, &src_reloc);
if (r) {
DRM_ERROR("bad DMA_PACKET_COPY\n");
return -EINVAL;
}
r = r600_dma_cs_next_reloc(p, &dst_reloc);
if (r) {
DRM_ERROR("bad DMA_PACKET_COPY\n");
return -EINVAL;
}
if (tiled) {
idx_value = radeon_get_ib_value(p, idx + 2);
/* detile bit */
if (idx_value & (1 << 31)) {
/* tiled src, linear dst */
src_offset = ib[idx+1];
src_offset <<= 8;
ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);

dst_offset = ib[idx+5];
dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
} else {
/* linear src, tiled dst */
src_offset = ib[idx+5];
src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;

dst_offset = ib[idx+1];
dst_offset <<= 8;
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
}
p->idx += 7;
} else {
src_offset = ib[idx+2];
src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
dst_offset = ib[idx+1];
dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;

ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
p->idx += 5;
}
if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
return -EINVAL;
}
if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
return -EINVAL;
}
break;
case DMA_PACKET_CONSTANT_FILL:
if (p->family < CHIP_RV770) {
DRM_ERROR("Constant Fill is 7xx only !\n");
return -EINVAL;
}
r = r600_dma_cs_next_reloc(p, &dst_reloc);
if (r) {
DRM_ERROR("bad DMA_PACKET_WRITE\n");
return -EINVAL;
}
dst_offset = ib[idx+1];
dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
return -EINVAL;
}
ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
p->idx += 4;
break;
case DMA_PACKET_NOP:
p->idx += 1;
break;
default:
DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
return -EINVAL;
}
} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
#if 0
for (r = 0; r < p->ib->length_dw; r++) {
printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
mdelay(1);
}
#endif
return 0;
}
Loading

0 comments on commit 652a187

Please sign in to comment.