Skip to content

Commit

Permalink
media: cedrus: h264: Optimize mv col buffer allocation
Browse files Browse the repository at this point in the history
Currently allocation for mv col buffer pool is very wasteful. It
allocates memory for worst case which is a lot more than it's needed for
typical use. Fix that by replacing pool with individual allocations when
such buffer is really needed. At that time all needed information for
determining optimal mv col buffer size is also known.

Signed-off-by: Jernej Skrabec <[email protected]>
Signed-off-by: Hans Verkuil <[email protected]>
  • Loading branch information
jernejsk authored and Hans Verkuil committed Oct 24, 2022
1 parent 0ee952c commit fec94f8
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 64 deletions.
7 changes: 3 additions & 4 deletions drivers/staging/media/sunxi/cedrus/cedrus.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ struct cedrus_buffer {
struct {
unsigned int position;
enum cedrus_h264_pic_type pic_type;
void *mv_col_buf;
dma_addr_t mv_col_buf_dma;
ssize_t mv_col_buf_size;
} h264;
struct {
void *mv_col_buf;
Expand All @@ -130,10 +133,6 @@ struct cedrus_ctx {

union {
struct {
void *mv_col_buf;
dma_addr_t mv_col_buf_dma;
ssize_t mv_col_buf_field_size;
ssize_t mv_col_buf_size;
void *pic_info_buf;
dma_addr_t pic_info_buf_dma;
ssize_t pic_info_buf_size;
Expand Down
118 changes: 58 additions & 60 deletions drivers/staging/media/sunxi/cedrus/cedrus_h264.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,13 @@ static void cedrus_h264_write_sram(struct cedrus_dev *dev,
cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
}

static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
unsigned int position,
static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf,
unsigned int field)
{
dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;

/* Adjust for the position */
addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
dma_addr_t addr = buf->codec.h264.mv_col_buf_dma;

/* Adjust for the field */
addr += field * ctx->codec.h264.mv_col_buf_field_size;
addr += field * buf->codec.h264.mv_col_buf_size / 2;

return addr;
}
Expand All @@ -76,22 +72,19 @@ static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
struct cedrus_h264_sram_ref_pic *pic)
{
struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
unsigned int position = buf->codec.h264.position;

pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);

pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
pic->mv_col_top_ptr =
cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
pic->mv_col_bot_ptr =
cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0));
pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1));
}

static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
struct cedrus_run *run)
static int cedrus_write_frame_list(struct cedrus_ctx *ctx,
struct cedrus_run *run)
{
struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
Expand Down Expand Up @@ -146,6 +139,31 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
output_buf->codec.h264.position = position;

if (!output_buf->codec.h264.mv_col_buf_size) {
const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
unsigned int field_size;

field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE))
field_size = field_size * 2;
if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
field_size = field_size * 2;

output_buf->codec.h264.mv_col_buf_size = field_size * 2;
/* Buffer is never accessed by CPU, so we can skip kernel mapping. */
output_buf->codec.h264.mv_col_buf =
dma_alloc_attrs(dev->dev,
output_buf->codec.h264.mv_col_buf_size,
&output_buf->codec.h264.mv_col_buf_dma,
GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);

if (!output_buf->codec.h264.mv_col_buf) {
output_buf->codec.h264.mv_col_buf_size = 0;
return -ENOMEM;
}
}

if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
Expand All @@ -162,6 +180,8 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
pic_list, sizeof(pic_list));

cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);

return 0;
}

#define CEDRUS_MAX_REF_IDX 32
Expand Down Expand Up @@ -496,6 +516,7 @@ static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
{
struct cedrus_dev *dev = ctx->dev;
int ret;

cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);

Expand All @@ -506,7 +527,9 @@ static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
ctx->codec.h264.neighbor_info_buf_dma);

cedrus_write_scaling_lists(ctx, run);
cedrus_write_frame_list(ctx, run);
ret = cedrus_write_frame_list(ctx, run);
if (ret)
return ret;

cedrus_set_params(ctx, run);

Expand All @@ -517,8 +540,6 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
{
struct cedrus_dev *dev = ctx->dev;
unsigned int pic_info_size;
unsigned int field_size;
unsigned int mv_col_size;
int ret;

/*
Expand Down Expand Up @@ -566,38 +587,6 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
goto err_pic_buf;
}

field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;

/*
* FIXME: This is actually conditional to
* V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
* might have to rework this if memory efficiency ever is
* something we need to work on.
*/
field_size = field_size * 2;

/*
* FIXME: This is actually conditional to
* V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
* have to rework this if memory efficiency ever is something
* we need to work on.
*/
field_size = field_size * 2;
ctx->codec.h264.mv_col_buf_field_size = field_size;

mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
ctx->codec.h264.mv_col_buf_size = mv_col_size;
ctx->codec.h264.mv_col_buf =
dma_alloc_attrs(dev->dev,
ctx->codec.h264.mv_col_buf_size,
&ctx->codec.h264.mv_col_buf_dma,
GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
if (!ctx->codec.h264.mv_col_buf) {
ret = -ENOMEM;
goto err_neighbor_buf;
}

if (ctx->src_fmt.width > 2048) {
/*
* Formulas for deblock and intra prediction buffer sizes
Expand All @@ -613,7 +602,7 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
if (!ctx->codec.h264.deblk_buf) {
ret = -ENOMEM;
goto err_mv_col_buf;
goto err_neighbor_buf;
}

/*
Expand Down Expand Up @@ -641,12 +630,6 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
ctx->codec.h264.deblk_buf_dma,
DMA_ATTR_NO_KERNEL_MAPPING);

err_mv_col_buf:
dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size,
ctx->codec.h264.mv_col_buf,
ctx->codec.h264.mv_col_buf_dma,
DMA_ATTR_NO_KERNEL_MAPPING);

err_neighbor_buf:
dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
ctx->codec.h264.neighbor_info_buf,
Expand All @@ -664,11 +647,26 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
static void cedrus_h264_stop(struct cedrus_ctx *ctx)
{
struct cedrus_dev *dev = ctx->dev;
struct cedrus_buffer *buf;
struct vb2_queue *vq;
unsigned int i;

vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);

for (i = 0; i < vq->num_buffers; i++) {
buf = vb2_to_cedrus_buffer(vb2_get_buffer(vq, i));

if (buf->codec.h264.mv_col_buf_size > 0) {
dma_free_attrs(dev->dev,
buf->codec.h264.mv_col_buf_size,
buf->codec.h264.mv_col_buf,
buf->codec.h264.mv_col_buf_dma,
DMA_ATTR_NO_KERNEL_MAPPING);

buf->codec.h264.mv_col_buf_size = 0;
}
}

dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size,
ctx->codec.h264.mv_col_buf,
ctx->codec.h264.mv_col_buf_dma,
DMA_ATTR_NO_KERNEL_MAPPING);
dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
ctx->codec.h264.neighbor_info_buf,
ctx->codec.h264.neighbor_info_buf_dma,
Expand Down

0 comments on commit fec94f8

Please sign in to comment.