summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/hevc.c40
-rw-r--r--libavcodec/hevc.h14
-rw-r--r--libavcodec/hevc_filter.c184
3 files changed, 224 insertions, 14 deletions
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 9459026814..a940f0bdbc 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -104,7 +104,8 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
s->tab_ipm = av_mallocz(min_pu_size);
- s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
+ s->is_pcm = av_mallocz_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
+
if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
goto fail;
@@ -353,9 +354,34 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
ff_videodsp_init (&s->vdsp, sps->bit_depth);
if (sps->sao_enabled && !s->avctx->hwaccel) {
+#ifdef USE_SAO_SMALL_BUFFER
+ {
+ int ctb_size = 1 << sps->log2_ctb_size;
+ int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
+ int c_idx, i;
+
+ for (i = 0; i < s->threads_number ; i++) {
+ HEVCLocalContext *lc = s->HEVClcList[i];
+ lc->sao_pixel_buffer =
+ av_malloc(((ctb_size + 2) * (ctb_size + 2)) <<
+ sps->pixel_shift);
+ }
+ for(c_idx = 0; c_idx < c_count; c_idx++) {
+ int w = sps->width >> sps->hshift[c_idx];
+ int h = sps->height >> sps->vshift[c_idx];
+ s->sao_pixel_buffer_h[c_idx] =
+ av_malloc((w * 2 * sps->ctb_height) <<
+ sps->pixel_shift);
+ s->sao_pixel_buffer_v[c_idx] =
+ av_malloc((h * 2 * sps->ctb_width) <<
+ sps->pixel_shift);
+ }
+ }
+#else
av_frame_unref(s->tmp_frame);
ret = get_buffer_sao(s, s->tmp_frame, sps);
s->sao_frame = s->tmp_frame;
+#endif
}
s->sps = sps;
@@ -3186,7 +3212,17 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
av_freep(&s->cabac_state);
+#ifdef USE_SAO_SMALL_BUFFER
+ for (i = 0; i < s->threads_number; i++) {
+ av_freep(&s->HEVClcList[i]->sao_pixel_buffer);
+ }
+ for (i = 0; i < 3; i++) {
+ av_freep(&s->sao_pixel_buffer_h[i]);
+ av_freep(&s->sao_pixel_buffer_v[i]);
+ }
+#else
av_frame_free(&s->tmp_frame);
+#endif
av_frame_free(&s->output_frame);
for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
@@ -3246,9 +3282,11 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
if (!s->cabac_state)
goto fail;
+#ifndef USE_SAO_SMALL_BUFFER
s->tmp_frame = av_frame_alloc();
if (!s->tmp_frame)
goto fail;
+#endif
s->output_frame = av_frame_alloc();
if (!s->output_frame)
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 1727b6097d..482341af76 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -36,6 +36,8 @@
#include "thread.h"
#include "videodsp.h"
+//#define USE_SAO_SMALL_BUFFER /* reduce the memory used by SAO */
+
#define MAX_DPB_SIZE 16 // A.4.1
#define MAX_REFS 16
@@ -745,6 +747,9 @@ typedef struct HEVCNAL {
} HEVCNAL;
typedef struct HEVCLocalContext {
+#ifdef USE_SAO_SMALL_BUFFER
+ uint8_t *sao_pixel_buffer;
+#endif
uint8_t cabac_state[HEVC_CONTEXTS];
uint8_t stat_coeff[4];
@@ -807,9 +812,14 @@ typedef struct HEVCContext {
uint8_t slice_initialized;
AVFrame *frame;
- AVFrame *sao_frame;
- AVFrame *tmp_frame;
AVFrame *output_frame;
+#ifdef USE_SAO_SMALL_BUFFER
+ uint8_t *sao_pixel_buffer_h[3];
+ uint8_t *sao_pixel_buffer_v[3];
+#else
+ AVFrame *tmp_frame;
+ AVFrame *sao_frame;
+#endif
const HEVCVPS *vps;
const HEVCSPS *sps;
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index 90bf1feb15..b8aa71e25a 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -139,7 +139,7 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
return s->qp_y_tab[x + y * s->sps->min_cb_width];
}
-static void copy_CTB(uint8_t *dst, uint8_t *src, int width, int height,
+static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
intptr_t stride_dst, intptr_t stride_src)
{
int i, j;
@@ -161,13 +161,65 @@ int i, j;
}
}
-static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int height, int c_idx)
+#if defined(USE_SAO_SMALL_BUFFER)
+static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
+{
+ if (pixel_shift)
+ *(uint16_t *)dst = *(uint16_t *)src;
+ else
+ *dst = *src;
+
+}
+
+static void copy_vert(uint8_t *dst, const uint8_t *src,
+ int pixel_shift, int height,
+ int stride_dst, int stride_src)
+{
+ int i;
+ if (pixel_shift == 0) {
+ for (i = 0; i < height; i++) {
+ *dst = *src;
+ dst += stride_dst;
+ src += stride_src;
+ }
+ } else {
+ for (i = 0; i < height; i++) {
+ *(uint16_t *)dst = *(uint16_t *)src;
+ dst += stride_dst;
+ src += stride_src;
+ }
+ }
+}
+
+static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
+ int stride_src, int x, int y, int width, int height,
+ int c_idx, int x_ctb, int y_ctb)
+{
+ int sh = s->sps->pixel_shift;
+ int w = s->sps->width >> s->sps->hshift[c_idx];
+ int h = s->sps->height >> s->sps->vshift[c_idx];
+
+ /* copy horizontal edges */
+ memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
+ src, width << sh);
+ memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
+ src + stride_src * (height - 1), width << sh);
+
+ /* copy vertical edges */
+ copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
+
+ copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
+}
+#endif
+
+static void restore_tqb_pixels(HEVCContext *s,
+ uint8_t *src1, const uint8_t *dst1,
+ ptrdiff_t stride_src, ptrdiff_t stride_dst,
+ int x0, int y0, int width, int height, int c_idx)
{
if ( s->pps->transquant_bypass_enable_flag ||
(s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
int x, y;
- ptrdiff_t stride_dst = s->sao_frame->linesize[c_idx];
- ptrdiff_t stride_src = s->frame->linesize[c_idx];
int min_pu_size = 1 << s->sps->log2_min_pu_size;
int hshift = s->sps->hshift[c_idx];
int vshift = s->sps->vshift[c_idx];
@@ -175,13 +227,13 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he
int y_min = ((y0 ) >> s->sps->log2_min_pu_size);
int x_max = ((x0 + width ) >> s->sps->log2_min_pu_size);
int y_max = ((y0 + height) >> s->sps->log2_min_pu_size);
- int len = min_pu_size >> hshift;
+ int len = (min_pu_size >> hshift) << s->sps->pixel_shift;
for (y = y_min; y < y_max; y++) {
for (x = x_min; x < x_max; x++) {
if (s->is_pcm[y * s->sps->min_pu_width + x]) {
int n;
- uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride_src + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
- uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride_dst + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
+ uint8_t *src = src1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
+ const uint8_t *dst = dst1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
for (n = 0; n < (min_pu_size >> vshift); n++) {
memcpy(src, dst, len);
src += stride_src;
@@ -198,6 +250,7 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he
static void sao_filter_CTB(HEVCContext *s, int x, int y)
{
static const uint8_t band_tab[8] = { 0, 1, 2, 2, 3, 3, 4, 4 };
+ HEVCLocalContext *lc = s->HEVClc;
int c_idx;
int edges[4]; // 0 left 1 top 2 right 3 bottom
int x_ctb = x >> s->sps->log2_ctb_size;
@@ -258,27 +311,132 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
int x0 = x >> s->sps->hshift[c_idx];
int y0 = y >> s->sps->vshift[c_idx];
int stride_src = s->frame->linesize[c_idx];
- int stride_dst = s->sao_frame->linesize[c_idx];
int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
int width = FFMIN(ctb_size_h, (s->sps->width >> s->sps->hshift[c_idx]) - x0);
int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
int tab = band_tab[(FFALIGN(width, 8) >> 3) - 1];
uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
+#if defined(USE_SAO_SMALL_BUFFER)
+ int stride_dst = ((1 << (s->sps->log2_ctb_size)) + 2) << s->sps->pixel_shift;
+ uint8_t *dst = lc->sao_pixel_buffer + (1 * stride_dst) + (1 << s->sps->pixel_shift);
+#else
+ int stride_dst = s->sao_frame->linesize[c_idx];
uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
+#endif
switch (sao->type_idx[c_idx]) {
case SAO_BAND:
copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
+#if defined(USE_SAO_SMALL_BUFFER)
+ copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
+ x_ctb, y_ctb);
+#endif
s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
sao->offset_val[c_idx], sao->band_position[c_idx],
width, height);
- restore_tqb_pixels(s, x, y, width, height, c_idx);
+ restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
+ x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
case SAO_EDGE:
{
- uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
+#if defined(USE_SAO_SMALL_BUFFER)
+ int w = s->sps->width >> s->sps->hshift[c_idx];
+ int h = s->sps->height >> s->sps->vshift[c_idx];
+ int left_edge = edges[0];
+ int top_edge = edges[1];
+ int right_edge = edges[2];
+ int bottom_edge = edges[3];
+ int sh = s->sps->pixel_shift;
+ int left_pixels, right_pixels;
+
+ if (!top_edge) {
+ int left = 1 - left_edge;
+ int right = 1 - right_edge;
+ const uint8_t *src1[2];
+ uint8_t *dst1;
+ int src_idx, pos;
+
+ dst1 = dst - stride_dst - (left << sh);
+ src1[0] = src - stride_src - (left << sh);
+ src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
+ pos = 0;
+ if (left) {
+ src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1, src1[src_idx], sh);
+ pos += (1 << sh);
+ }
+ src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
+ if (right) {
+ pos += width << sh;
+ src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
+ }
+ }
+ if (!bottom_edge) {
+ int left = 1 - left_edge;
+ int right = 1 - right_edge;
+ const uint8_t *src1[2];
+ uint8_t *dst1;
+ int src_idx, pos;
+
+ dst1 = dst + height * stride_dst - (left << sh);
+ src1[0] = src + height * stride_src - (left << sh);
+ src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
+ pos = 0;
+ if (left) {
+ src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1, src1[src_idx], sh);
+ pos += (1 << sh);
+ }
+ src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
+ if (right) {
+ pos += width << sh;
+ src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
+ }
+ }
+ left_pixels = 0;
+ if (!left_edge) {
+ if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
+ copy_vert(dst - (1 << sh),
+ s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
+ sh, height, stride_dst, 1 << sh);
+ } else {
+ left_pixels = 1;
+ }
+ }
+ right_pixels = 0;
+ if (!right_edge) {
+ if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
+ copy_vert(dst + (width << sh),
+ s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
+ sh, height, stride_dst, 1 << sh);
+ } else {
+ right_pixels = 1;
+ }
+ }
+
+ copy_CTB(dst - (left_pixels << sh),
+ src - (left_pixels << sh),
+ (width + left_pixels + right_pixels) << sh,
+ height, stride_dst, stride_src);
+
+ copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
+ x_ctb, y_ctb);
+#else
+ uint8_t left_pixels;
+ /* get the CTB edge pixels from the SAO pixel buffer */
+ left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
if (!edges[1]) {
uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
@@ -306,6 +464,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
src - (left_pixels << s->sps->pixel_shift),
(width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
+#endif
+ /* XXX: could handle the restoration here to simplify the
+ DSP functions */
s->hevcdsp.sao_edge_filter[restore](src, dst,
stride_src, stride_dst,
sao,
@@ -314,7 +475,8 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
vert_edge,
horiz_edge,
diag_edge);
- restore_tqb_pixels(s, x, y, width, height, c_idx);
+ restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
+ x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
}