summaryrefslogtreecommitdiff
path: root/libavcodec/hevc.c
diff options
context:
space:
mode:
authorMickaƫl Raulet <mraulet@insa-rennes.fr>2013-10-11 22:07:00 +0200
committerMichael Niedermayer <michaelni@gmx.at>2013-10-17 02:41:51 +0200
commitb4948943904a5e496e31c5e7778364fa0b03253c (patch)
tree0448ef136bfae4d01cad54adf2201e213d8e4cb8 /libavcodec/hevc.c
parent3ed65d98c616d52e2544c8b81aa3997f28bb88f5 (diff)
hevc: optimize residual coding(cherry picked from commit 70692a44708157b4dfa50e402e446bfa2b27f55e)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/hevc.c')
-rw-r--r--libavcodec/hevc.c245
1 files changed, 136 insertions, 109 deletions
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index d59b4e48fe..ad82012e1b 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -318,7 +318,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
uint8_t luma_weight_l1_flag[16];
uint8_t chroma_weight_l1_flag[16];
- s->sh.luma_log2_weight_denom = get_ue_golomb(gb);
+ s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
if (s->sps->chroma_format_idc != 0) {
int delta = get_se_golomb(gb);
s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
@@ -415,8 +415,8 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
return 0;
if (sps->num_long_term_ref_pics_sps > 0)
- nb_sps = get_ue_golomb(gb);
- nb_sh = get_ue_golomb(gb);
+ nb_sps = get_ue_golomb_long(gb);
+ nb_sh = get_ue_golomb_long(gb);
if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
return AVERROR_INVALIDDATA;
@@ -441,7 +441,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
delta_poc_msb_present = get_bits1(gb);
if (delta_poc_msb_present) {
- int delta = get_ue_golomb(gb);
+ int delta = get_ue_golomb_long(gb);
if (i && i != nb_sps)
delta += prev_delta_msb;
@@ -471,7 +471,7 @@ static int hls_slice_header(HEVCContext *s)
if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
sh->no_output_of_prior_pics_flag = get_bits1(gb);
- sh->pps_id = get_ue_golomb(gb);
+ sh->pps_id = get_ue_golomb_long(gb);
if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
return AVERROR_INVALIDDATA;
@@ -552,7 +552,7 @@ static int hls_slice_header(HEVCContext *s)
for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
- sh->slice_type = get_ue_golomb(gb);
+ sh->slice_type = get_ue_golomb_long(gb);
if (!(sh->slice_type == I_SLICE || sh->slice_type == P_SLICE ||
sh->slice_type == B_SLICE)) {
av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
@@ -646,9 +646,9 @@ static int hls_slice_header(HEVCContext *s)
sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
if (get_bits1(gb)) { // num_ref_idx_active_override_flag
- sh->nb_refs[L0] = get_ue_golomb(gb) + 1;
+ sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
if (sh->slice_type == B_SLICE)
- sh->nb_refs[L1] = get_ue_golomb(gb) + 1;
+ sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
}
if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
@@ -694,7 +694,7 @@ static int hls_slice_header(HEVCContext *s)
sh->collocated_list = !get_bits1(gb);
if (sh->nb_refs[sh->collocated_list] > 1) {
- sh->collocated_ref_idx = get_ue_golomb(gb);
+ sh->collocated_ref_idx = get_ue_golomb_long(gb);
if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
av_log(s->avctx, AV_LOG_ERROR,
"Invalid collocated_ref_idx: %d.\n", sh->collocated_ref_idx);
@@ -708,7 +708,7 @@ static int hls_slice_header(HEVCContext *s)
pred_weight_table(s, gb);
}
- sh->max_num_merge_cand = 5 - get_ue_golomb(gb);
+ sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
}
sh->slice_qp_delta = get_se_golomb(gb);
@@ -759,9 +759,9 @@ static int hls_slice_header(HEVCContext *s)
sh->num_entry_point_offsets = 0;
if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
- sh->num_entry_point_offsets = get_ue_golomb(gb);
+ sh->num_entry_point_offsets = get_ue_golomb_long(gb);
if (sh->num_entry_point_offsets > 0) {
- int offset_len = get_ue_golomb(gb) + 1;
+ int offset_len = get_ue_golomb_long(gb) + 1;
for (i = 0; i < sh->num_entry_point_offsets; i++)
skip_bits(gb, offset_len);
@@ -769,7 +769,7 @@ static int hls_slice_header(HEVCContext *s)
}
if (s->pps->slice_header_extension_present_flag) {
- int length = get_ue_golomb(gb);
+ int length = get_ue_golomb_long(gb);
for (i = 0; i < length; i++)
skip_bits(gb, 8); // slice_header_extension_data_byte
}
@@ -889,6 +889,8 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
int last_scan_pos;
int n_end;
int num_coeff = 0;
+ int greater1_ctx = 1;
+
int num_last_subset;
int x_cg_last_sig, y_cg_last_sig;
@@ -899,7 +901,8 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
int vshift = s->sps->vshift[c_idx];
uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride +
((x0 >> hshift) << s->sps->pixel_shift)];
- DECLARE_ALIGNED( 16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE] ) = {0};
+ DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = {0};
+ DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = {{0}};
int trafo_size = 1 << log2_trafo_size;
int i;
@@ -911,6 +914,17 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
// Derive QP for dequant
if (!lc->cu.cu_transquant_bypass_flag) {
static const int qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 };
+ static const uint8_t rem6[51 + 2 * 6 + 1] = {
+ 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
+ 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+ 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+ };
+
+ static const uint8_t div6[51 + 2 * 6 + 1] = {
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
+ 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
+ };
int qp_y = lc->qp_y;
if (c_idx == 0) {
@@ -936,7 +950,7 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
shift = s->sps->bit_depth + log2_trafo_size - 5;
add = 1 << (shift-1);
- scale = level_scale[qp%6] << (qp/6);
+ scale = level_scale[rem6[qp]] << (div6[qp]);
scale_m = 16; // default when no custom scaling lists.
dc_scale = 16;
@@ -954,8 +968,6 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
}
}
- memset(lc->rc.significant_coeff_group_flag, 0, 8 * 8);
-
if (s->pps->transform_skip_enabled_flag && !lc->cu.cu_transquant_bypass_flag &&
log2_trafo_size == 2) {
transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
@@ -1028,38 +1040,34 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
break;
}
num_coeff++;
-
num_last_subset = (num_coeff - 1) >> 4;
for (i = num_last_subset; i >= 0; i--) {
int n, m;
- int first_nz_pos_in_cg, last_nz_pos_in_cg, num_sig_coeff, first_greater1_coeff_idx;
- int sign_hidden;
- int sum_abs;
int x_cg, y_cg, x_c, y_c, pos;
int implicit_non_zero_coeff = 0;
int64_t trans_coeff_level;
-
+ int prev_sig = 0;
int offset = i << 4;
- uint8_t significant_coeff_flag_idx[16] = {0};
- uint8_t coeff_abs_level_greater1_flag[16] = {0};
- uint8_t coeff_abs_level_greater2_flag[16] = {0};
- uint16_t coeff_sign_flag;
+ uint8_t significant_coeff_flag_idx[16];
uint8_t nb_significant_coeff_flag = 0;
- int first_elem;
-
x_cg = scan_x_cg[i];
y_cg = scan_y_cg[i];
if ((i < num_last_subset) && (i > 0)) {
- lc->rc.significant_coeff_group_flag[x_cg][y_cg] =
- ff_hevc_significant_coeff_group_flag_decode(s, c_idx, x_cg, y_cg,
- log2_trafo_size);
+ int ctx_cg = 0;
+ if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
+ ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
+ if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
+ ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
+
+ significant_coeff_group_flag[x_cg][y_cg] =
+ ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
implicit_non_zero_coeff = 1;
} else {
- lc->rc.significant_coeff_group_flag[x_cg][y_cg] =
+ significant_coeff_group_flag[x_cg][y_cg] =
((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
(x_cg == 0 && y_cg == 0));
}
@@ -1073,107 +1081,126 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
} else {
n_end = 15;
}
+
+ if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
+ prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
+ if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
+ prev_sig += (significant_coeff_group_flag[x_cg][y_cg + 1] << 1);
+
for (n = n_end; n >= 0; n--) {
GET_COORD(offset, n);
- if (lc->rc.significant_coeff_group_flag[x_cg][y_cg] &&
+ if (significant_coeff_group_flag[x_cg][y_cg] &&
(n > 0 || implicit_non_zero_coeff == 0)) {
- if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c, log2_trafo_size, scan_idx) == 1) {
+ if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c, log2_trafo_size, scan_idx, prev_sig) == 1) {
significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
- nb_significant_coeff_flag = nb_significant_coeff_flag + 1;
+ nb_significant_coeff_flag++;
implicit_non_zero_coeff = 0;
}
} else {
int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
- if (last_cg && implicit_non_zero_coeff && lc->rc.significant_coeff_group_flag[x_cg][y_cg]) {
+ if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
- nb_significant_coeff_flag = nb_significant_coeff_flag + 1;
+ nb_significant_coeff_flag++;
}
}
}
n_end = nb_significant_coeff_flag;
- first_nz_pos_in_cg = 16;
- last_nz_pos_in_cg = -1;
- num_sig_coeff = 0;
- first_greater1_coeff_idx = -1;
- for (m = 0; m < n_end; m++) {
- n = significant_coeff_flag_idx[m];
- if (num_sig_coeff < 8) {
- coeff_abs_level_greater1_flag[n] =
- ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, i, n,
- (num_sig_coeff == 0),
- (i == num_last_subset));
- num_sig_coeff++;
- if (coeff_abs_level_greater1_flag[n] &&
+
+ if (n_end) {
+ int first_nz_pos_in_cg = 16;
+ int last_nz_pos_in_cg = -1;
+ int c_rice_param = 0;
+ int first_greater1_coeff_idx = -1;
+ uint8_t coeff_abs_level_greater1_flag[16] = {0};
+ uint16_t coeff_sign_flag;
+ int sum_abs = 0;
+ int sign_hidden = 0;
+
+ // initialize first elem of coeff_bas_level_greater1_flag
+ int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
+
+ if (!(i == num_last_subset) && greater1_ctx == 0)
+ ctx_set++;
+ greater1_ctx = 1;
+ last_nz_pos_in_cg = significant_coeff_flag_idx[0];
+
+ for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
+ int n_idx = significant_coeff_flag_idx[m];
+ int inc = (ctx_set << 2) + greater1_ctx;
+ coeff_abs_level_greater1_flag[n_idx] =
+ ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
+ if (coeff_abs_level_greater1_flag[n_idx]) {
+ greater1_ctx = 0;
+ } else if (greater1_ctx > 0 && greater1_ctx < 3) {
+ greater1_ctx++;
+ }
+
+ if (coeff_abs_level_greater1_flag[n_idx] &&
first_greater1_coeff_idx == -1)
- first_greater1_coeff_idx = n;
+ first_greater1_coeff_idx = n_idx;
}
- if (last_nz_pos_in_cg == -1)
- last_nz_pos_in_cg = n;
- first_nz_pos_in_cg = n;
- }
-
- sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
- !lc->cu.cu_transquant_bypass_flag);
- if (first_greater1_coeff_idx != -1) {
- coeff_abs_level_greater2_flag[first_greater1_coeff_idx] =
- ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, i, first_greater1_coeff_idx);
- }
- if (!s->pps->sign_data_hiding_flag || !sign_hidden ) {
- coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
- } else {
- coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag-1) << (16 - (nb_significant_coeff_flag - 1));
- }
+ first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
+ sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
+ !lc->cu.cu_transquant_bypass_flag);
- num_sig_coeff = 0;
- sum_abs = 0;
- first_elem = 1;
- for (m = 0; m < n_end; m++) {
- n = significant_coeff_flag_idx[m];
- GET_COORD(offset, n);
- trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n] +
- coeff_abs_level_greater2_flag[n];
- if (trans_coeff_level == ((num_sig_coeff < 8) ?
- ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
- trans_coeff_level += ff_hevc_coeff_abs_level_remaining(s, first_elem, trans_coeff_level);
- first_elem = 0;
+ if (first_greater1_coeff_idx != -1) {
+ coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
}
- if (s->pps->sign_data_hiding_flag && sign_hidden) {
- sum_abs += trans_coeff_level;
- if (n == first_nz_pos_in_cg && ((sum_abs&1) == 1))
- trans_coeff_level = -trans_coeff_level;
+ if (!s->pps->sign_data_hiding_flag || !sign_hidden ) {
+ coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
+ } else {
+ coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
}
- if (coeff_sign_flag >> 15)
- trans_coeff_level = -trans_coeff_level;
- coeff_sign_flag <<= 1;
- num_sig_coeff++;
-
- if (!lc->cu.cu_transquant_bypass_flag) {
- if(s->sps->scaling_list_enable_flag) {
- if(y_c || x_c || log2_trafo_size < 4) {
- switch(log2_trafo_size) {
- case 3: pos = (y_c << 3) + x_c; break;
- case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
- case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
- default: pos = (y_c << 2) + x_c;
- }
- scale_m = scale_matrix[pos];
- } else
- scale_m = dc_scale;
+
+ for (m = 0; m < n_end; m++) {
+ n = significant_coeff_flag_idx[m];
+ GET_COORD(offset, n);
+ trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
+ if (trans_coeff_level == ((m < 8) ?
+ ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
+ int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
+
+ trans_coeff_level += last_coeff_abs_level_remaining;
+ if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
+ c_rice_param = FFMIN(c_rice_param + 1, 4);
+
}
- trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
- if (trans_coeff_level < 0) {
- if((~trans_coeff_level) & 0xFffffffffff8000)
- trans_coeff_level = -32768;
- } else {
- if(trans_coeff_level & 0xffffffffffff8000)
- trans_coeff_level = 32767;
+ if (s->pps->sign_data_hiding_flag && sign_hidden) {
+ sum_abs += trans_coeff_level;
+ if (n == first_nz_pos_in_cg && ((sum_abs&1) == 1))
+ trans_coeff_level = -trans_coeff_level;
}
+ if (coeff_sign_flag >> 15)
+ trans_coeff_level = -trans_coeff_level;
+ coeff_sign_flag <<= 1;
+ if(!lc->cu.cu_transquant_bypass_flag) {
+ if(s->sps->scaling_list_enable_flag) {
+ if(y_c || x_c || log2_trafo_size < 4) {
+ switch(log2_trafo_size) {
+ case 3: pos = (y_c << 3) + x_c; break;
+ case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
+ case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
+ default: pos = (y_c << 2) + x_c;
+ }
+ scale_m = scale_matrix[pos];
+ } else {
+ scale_m = dc_scale;
+ }
+ }
+ trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
+ if(trans_coeff_level < 0) {
+ if((~trans_coeff_level) & 0xFffffffffff8000)
+ trans_coeff_level = -32768;
+ } else {
+ if(trans_coeff_level & 0xffffffffffff8000)
+ trans_coeff_level = 32767;
+ }
+ }
+ coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
}
-
- coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
}
}