Skip to content

Commit

Permalink
avcodec/hevc: reduce memory used by the SAO
Browse files Browse the repository at this point in the history
  • Loading branch information
mraulet committed Jan 12, 2015
1 parent ef01ca9 commit 2ef766d
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 14 deletions.
37 changes: 37 additions & 0 deletions libavcodec/hevc.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,34 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
ff_videodsp_init (&s->vdsp, sps->bit_depth);

if (sps->sao_enabled) {
#ifdef USE_SAO_SMALL_BUFFER
{
int ctb_size = 1 << sps->log2_ctb_size;
int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
int c_idx, i;

for (i = 0; i < s->threads_number ; i++) {
HEVCLocalContext *lc = s->HEVClcList[i];
lc->sao_pixel_buffer =
av_malloc(((ctb_size + 2) * (ctb_size + 2)) <<
sps->pixel_shift);
}
for(c_idx = 0; c_idx < c_count; c_idx++) {
int w = sps->width >> sps->hshift[c_idx];
int h = sps->height >> sps->vshift[c_idx];
s->sao_pixel_buffer_h[c_idx] =
av_malloc((w * 2 * sps->ctb_height) <<
sps->pixel_shift);
s->sao_pixel_buffer_v[c_idx] =
av_malloc((h * 2 * sps->ctb_width) <<
sps->pixel_shift);
}
}
#else
av_frame_unref(s->tmp_frame);
ret = get_buffer_sao(s, s->tmp_frame, sps);
s->sao_frame = s->tmp_frame;
#endif
}

s->sps = sps;
Expand Down Expand Up @@ -3543,7 +3568,17 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)

av_freep(&s->cabac_state);

#ifdef USE_SAO_SMALL_BUFFER
for (i = 0; i < s->threads_number; i++) {
av_freep(&s->HEVClcList[i]->sao_pixel_buffer);
}
for (i = 0; i < 3; i++) {
av_freep(&s->sao_pixel_buffer_h[i]);
av_freep(&s->sao_pixel_buffer_v[i]);
}
#else
av_frame_free(&s->tmp_frame);
#endif
av_frame_free(&s->output_frame);

for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
Expand Down Expand Up @@ -3614,10 +3649,12 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
if (!s->cabac_state)
goto fail;

#ifndef USE_SAO_SMALL_BUFFER
s->tmp_frame = av_frame_alloc();
s->dynamic_alloc += sizeof(AVFrame);
if (!s->tmp_frame)
goto fail;
#endif

s->output_frame = av_frame_alloc();
s->dynamic_alloc += sizeof(AVFrame);
Expand Down
14 changes: 11 additions & 3 deletions libavcodec/hevc.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "hevc_defs.h"

#define TEST_MIN_TB_ADDR_ZS
#define USE_SAO_SMALL_BUFFER /* reduce the memory used by SAO */

#define MAX_DPB_SIZE 16 // A.4.1
#define MAX_REFS 16
Expand Down Expand Up @@ -1037,7 +1038,9 @@ typedef struct HEVCLocalContext {
CodingUnit cu;
PredictionUnit pu;
NeighbourAvailable na;

#ifdef USE_SAO_SMALL_BUFFER
uint8_t *sao_pixel_buffer;
#endif
uint8_t cabac_state[HEVC_CONTEXTS];

uint8_t stat_coeff[4];
Expand Down Expand Up @@ -1084,9 +1087,14 @@ typedef struct HEVCContext {
uint8_t *cabac_state;

AVFrame *frame;
AVFrame *sao_frame;
AVFrame *tmp_frame;
AVFrame *output_frame;
#ifdef USE_SAO_SMALL_BUFFER
uint8_t *sao_pixel_buffer_h[3];
uint8_t *sao_pixel_buffer_v[3];
#else
AVFrame *tmp_frame;
AVFrame *sao_frame;
#endif

const HEVCVPS *vps;
const HEVCSPS *sps;
Expand Down
186 changes: 175 additions & 11 deletions libavcodec/hevc_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
return s->qp_y_tab[x + y * s->sps->min_cb_width];
}

static void copy_CTB(uint8_t *dst, uint8_t *src,
static void copy_CTB(uint8_t *dst, const uint8_t *src,
int width, int height, int stride_dst, int stride_src)
{
int i;
Expand All @@ -163,27 +163,79 @@ static void copy_CTB(uint8_t *dst, uint8_t *src,
}
}

static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int height, int c_idx)
#if defined(USE_SAO_SMALL_BUFFER)
static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
{
if (pixel_shift)
*(uint16_t *)dst = *(uint16_t *)src;
else
*dst = *src;

}

static void copy_vert(uint8_t *dst, const uint8_t *src,
int pixel_shift, int height,
int stride_dst, int stride_src)
{
int i;
if (pixel_shift == 0) {
for (i = 0; i < height; i++) {
*dst = *src;
dst += stride_dst;
src += stride_src;
}
} else {
for (i = 0; i < height; i++) {
*(uint16_t *)dst = *(uint16_t *)src;
dst += stride_dst;
src += stride_src;
}
}
}

static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
int stride_src, int x, int y, int width, int height,
int c_idx, int x_ctb, int y_ctb)
{
int sh = s->sps->pixel_shift;
int w = s->sps->width >> s->sps->hshift[c_idx];
int h = s->sps->height >> s->sps->vshift[c_idx];

/* copy horizontal edges */
memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
src, width << sh);
memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
src + stride_src * (height - 1), width << sh);

/* copy vertical edges */
copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);

copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
}
#endif

static void restore_tqb_pixels(HEVCContext *s,
uint8_t *src1, const uint8_t *dst1,
ptrdiff_t stride_src, ptrdiff_t stride_dst,
int x0, int y0, int width, int height, int c_idx)
{
if ( s->pps->transquant_bypass_enable_flag ||
(s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
int x, y;
ptrdiff_t stride_dst = s->sao_frame->linesize[c_idx];
ptrdiff_t stride_src = s->frame->linesize[c_idx];
int min_pu_size = 1 << s->sps->log2_min_pu_size;
int hshift = s->sps->hshift[c_idx];
int vshift = s->sps->vshift[c_idx];
int x_min = ((x0 ) >> s->sps->log2_min_pu_size);
int y_min = ((y0 ) >> s->sps->log2_min_pu_size);
int x_max = ((x0 + width ) >> s->sps->log2_min_pu_size);
int y_max = ((y0 + height) >> s->sps->log2_min_pu_size);
int len = min_pu_size >> hshift;
int len = (min_pu_size >> hshift) << s->sps->pixel_shift;
for (y = y_min; y < y_max; y++) {
for (x = x_min; x < x_max; x++) {
if (s->is_pcm[y * s->sps->min_pu_width + x]) {
int n;
uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride_src + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride_dst + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
uint8_t *src = src1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
const uint8_t *dst = dst1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
for (n = 0; n < (min_pu_size >> vshift); n++) {
memcpy(src, dst, len);
src += stride_src;
Expand All @@ -199,6 +251,9 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he

static void sao_filter_CTB(HEVCContext *s, int x, int y)
{
#if defined(USE_SAO_SMALL_BUFFER)
HEVCLocalContext *lc = s->HEVClc;
#endif
int c_idx;
int edges[4]; // 0 left 1 top 2 right 3 bottom
int x_ctb = x >> s->sps->log2_ctb_size;
Expand Down Expand Up @@ -259,28 +314,133 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
int x0 = x >> s->sps->hshift[c_idx];
int y0 = y >> s->sps->vshift[c_idx];
int stride_src = s->frame->linesize[c_idx];
int stride_dst = s->sao_frame->linesize[c_idx];
int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
int width = FFMIN(ctb_size_h, (s->sps->width >> s->sps->hshift[c_idx]) - x0);
int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
#if defined(USE_SAO_SMALL_BUFFER)
int stride_dst = ((1 << (s->sps->log2_ctb_size)) + 2) << s->sps->pixel_shift;
uint8_t *dst = lc->sao_pixel_buffer + (1 * stride_dst) + (1 << s->sps->pixel_shift);
#else
int stride_dst = s->sao_frame->linesize[c_idx];
uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
#endif

switch (sao->type_idx[c_idx]) {
case SAO_BAND:
copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
#if defined(USE_SAO_SMALL_BUFFER)
copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
x_ctb, y_ctb);
#endif
s->hevcdsp.sao_band_filter(src, dst,
stride_src, stride_dst,
sao,
edges, width,
height, c_idx);
restore_tqb_pixels(s, x, y, width, height, c_idx);
restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
case SAO_EDGE:
{
uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
#if defined(USE_SAO_SMALL_BUFFER)
int w = s->sps->width >> s->sps->hshift[c_idx];
int h = s->sps->height >> s->sps->vshift[c_idx];
int left_edge = edges[0];
int top_edge = edges[1];
int right_edge = edges[2];
int bottom_edge = edges[3];
int sh = s->sps->pixel_shift;
int left_pixels, right_pixels;

if (!top_edge) {
int left = 1 - left_edge;
int right = 1 - right_edge;
const uint8_t *src1[2];
uint8_t *dst1;
int src_idx, pos;

dst1 = dst - stride_dst - (left << sh);
src1[0] = src - stride_src - (left << sh);
src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
pos = 0;
if (left) {
src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1, src1[src_idx], sh);
pos += (1 << sh);
}
src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
SAO_APPLIED);
memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
if (right) {
pos += width << sh;
src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
}
}
if (!bottom_edge) {
int left = 1 - left_edge;
int right = 1 - right_edge;
const uint8_t *src1[2];
uint8_t *dst1;
int src_idx, pos;

dst1 = dst + height * stride_dst - (left << sh);
src1[0] = src + height * stride_src - (left << sh);
src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
pos = 0;
if (left) {
src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1, src1[src_idx], sh);
pos += (1 << sh);
}
src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
SAO_APPLIED);
memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
if (right) {
pos += width << sh;
src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
}
}
left_pixels = 0;
if (!left_edge) {
if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
copy_vert(dst - (1 << sh),
s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
sh, height, stride_dst, 1 << sh);
} else {
left_pixels = 1;
}
}
right_pixels = 0;
if (!right_edge) {
if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
copy_vert(dst + (width << sh),
s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
sh, height, stride_dst, 1 << sh);
} else {
right_pixels = 1;
}
}

copy_CTB(dst - (left_pixels << sh),
src - (left_pixels << sh),
(width + left_pixels + right_pixels) << sh,
height, stride_dst, stride_src);

copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
x_ctb, y_ctb);
#else
uint8_t left_pixels;
/* get the CTB edge pixels from the SAO pixel buffer */
left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
if (!edges[1]) {
uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
Expand Down Expand Up @@ -308,6 +468,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
src - (left_pixels << s->sps->pixel_shift),
(width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
#endif
/* XXX: could handle the restoration here to simplify the
DSP functions */
s->hevcdsp.sao_edge_filter[restore](src, dst,
stride_src, stride_dst,
sao,
Expand All @@ -316,7 +479,8 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
vert_edge,
horiz_edge,
diag_edge);
restore_tqb_pixels(s, x, y, width, height, c_idx);
restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
}
Expand Down

0 comments on commit 2ef766d

Please sign in to comment.