diff options
Diffstat (limited to 'libavcodec/x86/h264_cabac.asm')
-rw-r--r-- | libavcodec/x86/h264_cabac.asm | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_cabac.asm b/libavcodec/x86/h264_cabac.asm new file mode 100644 index 0000000000..6cea03e664 --- /dev/null +++ b/libavcodec/x86/h264_cabac.asm @@ -0,0 +1,179 @@ +;******************************************************************************* +;* x86-optimized H.264 CABAC decoding (non-SIMD) +;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> +;* Copyright (c) 2020 Anton Khirnov <anton@khirnov.net> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +%define H264_NORM_SHIFT_OFFSET 0 +%define H264_LPS_RANGE_OFFSET 512 +%define H264_MLPS_STATE_OFFSET 1024 +%define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280 + +%define CC_OFF_LOW 0 +%define CC_OFF_RANGE 4 +%define CC_OFF_BYTESTREAM 24 +%define CC_OFF_BYTESTREAM_END 32 + +cextern h264_cabac_tables + +SECTION .text + +%macro GET_CABAC 0 + push rcx + + movzx bitd, byte [statep] + mov tmpd, ranged + and ranged, 0xc0 + lea ecx, [bitd + ranged * 2] + movzx ranged, byte [tablesq + rcx + H264_LPS_RANGE_OFFSET] + sub tmpd, ranged + mov ecx, tmpd + shl tmpd, 17 + cmp tmpd, lowd + cmova ranged, ecx + sbb rcx, rcx + and tmpd, ecx + xor bitq, rcx + sub lowd, tmpd + + movzx ecx, byte [tablesq + H264_NORM_SHIFT_OFFSET + rangeq] + shl ranged, cl + shl lowd, cl + + movzx tmpd, byte [tablesq + H264_MLPS_STATE_OFFSET + 128 + bitq] + mov [statep], tmpb + + test loww, loww + jnz %%finish + + movzx tmpd, word [bytestreamq] + cmp bytestreamq, bytestream_endq + jge %%skip_bs_advance + add bytestreamq, 2 +%%skip_bs_advance: + + lea ecx, [lowd - 1] + xor ecx, lowd + shr ecx, 15 + bswap tmpd + shr tmpd, 15 + + movzx ecx, byte [tablesq + rcx + H264_NORM_SHIFT_OFFSET] + sub tmpd, 0xffff + neg ecx + add ecx, 7 + shl tmpd, cl + add lowd, tmpd +%%finish: + pop rcx +%endmacro + +%macro DECODE_SIGNIFICANCE 1 + ; store the values we won't need until the end on stack + mov qword [rsp], 4 + sub [rsp], indexq + mov qword [rsp + gprsize], ccq + + ; load CABACContext fields + mov lowd, [ccq + CC_OFF_LOW] + mov ranged, [ccq + CC_OFF_RANGE] + mov bytestreamq, [ccq + CC_OFF_BYTESTREAM] + mov bytestream_endq, [ccq + CC_OFF_BYTESTREAM_END] + + ; recycle the cabac context register for tables + %define tablesq ccq + lea tablesq, [h264_cabac_tables] + +%if %1 + %define loop_count 63 + %define statep stateq +%else + sub max_coeffd, 1 + %define loop_count max_coeffd + %define statep significant_coeff_ctx_baseq + counterq +%endif + + xor counterq, counterq + +.loop: +%if %1 + movzx stated, byte [sig_offq + counterq] + add stateq, significant_coeff_ctx_baseq +%endif + + GET_CABAC + + test bitq, 1 + jz .continue + +%if %1 + movzx stated, byte [tablesq + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET + counterq] + add stateq, last_coeff_ctx_baseq +%else + add significant_coeff_ctx_baseq, last_offq +%endif + + GET_CABAC + +%if !%1 + sub significant_coeff_ctx_baseq, last_offq +%endif + + mov [indexq], counterd + + test bitq, 1 + jnz .finish + + add indexq, 4 + +.continue: + add counterd, 1 + cmp counterd, loop_count + jb .loop + + mov [indexq], counterd + +.finish: + ; update CABACContext + mov ccq, [rsp + gprsize] + mov [ccq + CC_OFF_LOW], lowd + mov [ccq + CC_OFF_RANGE], ranged + mov [ccq + CC_OFF_BYTESTREAM], bytestreamq + + add indexq, [rsp] + shr indexq, 2 + + movifnidn rax, indexq + + RET +%endmacro + +; int ff_h264_decode_significance_x86(CABACContext *cc, int max_coeff, +; uint8_t *significant_coeff_ctx_base, int *index, ptrdiff_t last_off) +cglobal h264_decode_significance_x86, 5, 12, 0, gprsize * 2, \ + cc, max_coeff, significant_coeff_ctx_base, index, last_off, \ + bytestream, bytestream_end, low, range, bit, counter, tmp +DECODE_SIGNIFICANCE 0 + +cglobal h264_decode_significance_8x8_x86, 5, 13, 0, gprsize * 2, \ + cc, significant_coeff_ctx_base, index, last_coeff_ctx_base, sig_off, \ + bytestream, bytestream_end, low, range, bit, counter, tmp, state +DECODE_SIGNIFICANCE 1 |