summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2020-06-18 08:38:49 +0200
committerAnton Khirnov <anton@khirnov.net>2020-07-01 18:15:13 +0200
commita2b0224457c3cabae05cfc2638d9d5d30d93e469 (patch)
tree4a7ba54ff0db06a98baeb69af31aa7d6a368c522
parent3223f6b32d5d60c73b1e14475d4751770ae9344d (diff)
x86/h264_cabac: convert decode_significance from inline asm to externaldecode_significance
This is vastly more readable and more portable.
-rw-r--r--libavcodec/h264_cabac.c2
-rw-r--r--libavcodec/x86/Makefile1
-rw-r--r--libavcodec/x86/h264_cabac.asm179
-rw-r--r--libavcodec/x86/h264_cabac.c208
-rw-r--r--libavcodec/x86/h264_cabac.h40
5 files changed, 221 insertions, 209 deletions
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 86f0a412fa..ca4646b3dd 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -42,7 +42,7 @@
#include "mpegutils.h"
#if ARCH_X86
-#include "x86/h264_cabac.c"
+#include "x86/h264_cabac.h"
#endif
/* Cabac pre state table */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 194135dafb..b6a2f67248 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -162,6 +162,7 @@ X86ASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
ifdef CONFIG_GPL
X86ASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
endif
+X86ASM-OBJS-$(CONFIG_H264_DECODER) += x86/h264_cabac.o
X86ASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_add_res.o \
x86/hevc_deblock.o \
x86/hevc_idct.o \
diff --git a/libavcodec/x86/h264_cabac.asm b/libavcodec/x86/h264_cabac.asm
new file mode 100644
index 0000000000..6cea03e664
--- /dev/null
+++ b/libavcodec/x86/h264_cabac.asm
@@ -0,0 +1,179 @@
+;*******************************************************************************
+;* x86-optimized H.264 CABAC decoding (non-SIMD)
+;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+;* Copyright (c) 2020 Anton Khirnov <anton@khirnov.net>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+%define H264_NORM_SHIFT_OFFSET 0
+%define H264_LPS_RANGE_OFFSET 512
+%define H264_MLPS_STATE_OFFSET 1024
+%define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280
+
+%define CC_OFF_LOW 0
+%define CC_OFF_RANGE 4
+%define CC_OFF_BYTESTREAM 24
+%define CC_OFF_BYTESTREAM_END 32
+
+cextern h264_cabac_tables
+
+SECTION .text
+
+%macro GET_CABAC 0
+ push rcx
+
+ movzx bitd, byte [statep]
+ mov tmpd, ranged
+ and ranged, 0xc0
+ lea ecx, [bitd + ranged * 2]
+ movzx ranged, byte [tablesq + rcx + H264_LPS_RANGE_OFFSET]
+ sub tmpd, ranged
+ mov ecx, tmpd
+ shl tmpd, 17
+ cmp tmpd, lowd
+ cmova ranged, ecx
+ sbb rcx, rcx
+ and tmpd, ecx
+ xor bitq, rcx
+ sub lowd, tmpd
+
+ movzx ecx, byte [tablesq + H264_NORM_SHIFT_OFFSET + rangeq]
+ shl ranged, cl
+ shl lowd, cl
+
+ movzx tmpd, byte [tablesq + H264_MLPS_STATE_OFFSET + 128 + bitq]
+ mov [statep], tmpb
+
+ test loww, loww
+ jnz %%finish
+
+ movzx tmpd, word [bytestreamq]
+ cmp bytestreamq, bytestream_endq
+ jge %%skip_bs_advance
+ add bytestreamq, 2
+%%skip_bs_advance:
+
+ lea ecx, [lowd - 1]
+ xor ecx, lowd
+ shr ecx, 15
+ bswap tmpd
+ shr tmpd, 15
+
+ movzx ecx, byte [tablesq + rcx + H264_NORM_SHIFT_OFFSET]
+ sub tmpd, 0xffff
+ neg ecx
+ add ecx, 7
+ shl tmpd, cl
+ add lowd, tmpd
+%%finish:
+ pop rcx
+%endmacro
+
+%macro DECODE_SIGNIFICANCE 1
+ ; store the values we won't need until the end on stack
+ mov qword [rsp], 4
+ sub [rsp], indexq
+ mov qword [rsp + gprsize], ccq
+
+ ; load CABACContext fields
+ mov lowd, [ccq + CC_OFF_LOW]
+ mov ranged, [ccq + CC_OFF_RANGE]
+ mov bytestreamq, [ccq + CC_OFF_BYTESTREAM]
+ mov bytestream_endq, [ccq + CC_OFF_BYTESTREAM_END]
+
+ ; recycle the cabac context register for tables
+ %define tablesq ccq
+ lea tablesq, [h264_cabac_tables]
+
+%if %1
+ %define loop_count 63
+ %define statep stateq
+%else
+ sub max_coeffd, 1
+ %define loop_count max_coeffd
+ %define statep significant_coeff_ctx_baseq + counterq
+%endif
+
+ xor counterq, counterq
+
+.loop:
+%if %1
+ movzx stated, byte [sig_offq + counterq]
+ add stateq, significant_coeff_ctx_baseq
+%endif
+
+ GET_CABAC
+
+ test bitq, 1
+ jz .continue
+
+%if %1
+ movzx stated, byte [tablesq + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET + counterq]
+ add stateq, last_coeff_ctx_baseq
+%else
+ add significant_coeff_ctx_baseq, last_offq
+%endif
+
+ GET_CABAC
+
+%if !%1
+ sub significant_coeff_ctx_baseq, last_offq
+%endif
+
+ mov [indexq], counterd
+
+ test bitq, 1
+ jnz .finish
+
+ add indexq, 4
+
+.continue:
+ add counterd, 1
+ cmp counterd, loop_count
+ jb .loop
+
+ mov [indexq], counterd
+
+.finish:
+ ; update CABACContext
+ mov ccq, [rsp + gprsize]
+ mov [ccq + CC_OFF_LOW], lowd
+ mov [ccq + CC_OFF_RANGE], ranged
+ mov [ccq + CC_OFF_BYTESTREAM], bytestreamq
+
+ add indexq, [rsp]
+ shr indexq, 2
+
+ movifnidn rax, indexq
+
+ RET
+%endmacro
+
+; int ff_h264_decode_significance_x86(CABACContext *cc, int max_coeff,
+; uint8_t *significant_coeff_ctx_base, int *index, ptrdiff_t last_off)
+cglobal h264_decode_significance_x86, 5, 12, 0, gprsize * 2, \
+ cc, max_coeff, significant_coeff_ctx_base, index, last_off, \
+ bytestream, bytestream_end, low, range, bit, counter, tmp
+DECODE_SIGNIFICANCE 0
+
+cglobal h264_decode_significance_8x8_x86, 5, 13, 0, gprsize * 2, \
+ cc, significant_coeff_ctx_base, index, last_coeff_ctx_base, sig_off, \
+ bytestream, bytestream_end, low, range, bit, counter, tmp, state
+DECODE_SIGNIFICANCE 1
diff --git a/libavcodec/x86/h264_cabac.c b/libavcodec/x86/h264_cabac.c
deleted file mode 100644
index 2edc6d7e74..0000000000
--- a/libavcodec/x86/h264_cabac.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
- * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * H.264 / AVC / MPEG-4 part10 codec.
- * non-SIMD x86-specific optimizations for H.264
- * @author Michael Niedermayer <michaelni@gmx.at>
- */
-
-#include <stddef.h>
-
-#include "libavcodec/cabac.h"
-#include "cabac.h"
-
-#if HAVE_INLINE_ASM
-
-#if ARCH_X86_64
-#define REG64 "r"
-#else
-#define REG64 "m"
-#endif
-
-//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
-//as that would make optimization work hard)
-#if HAVE_7REGS && !BROKEN_COMPILER
-#define decode_significance decode_significance_x86
-static int decode_significance_x86(CABACContext *c, int max_coeff,
- uint8_t *significant_coeff_ctx_base,
- int *index, x86_reg last_off){
- void *end= significant_coeff_ctx_base + max_coeff - 1;
- int minusstart= -(intptr_t)significant_coeff_ctx_base;
- int minusindex= 4-(intptr_t)index;
- int bit;
- x86_reg coeff_count;
-
-#ifdef BROKEN_RELOCATIONS
- void *tables;
-
- __asm__ volatile(
- "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
- : "=&r"(tables)
- : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
- );
-#endif
-
- __asm__ volatile(
- "3: \n\t"
-
- BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
- "%5", "%q5", "%k0", "%b0",
- "%c11(%6)", "%c12(%6)",
- AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
- AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
- AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
- "%13")
-
- "test $1, %4 \n\t"
- " jz 4f \n\t"
- "add %10, %1 \n\t"
-
- BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
- "%5", "%q5", "%k0", "%b0",
- "%c11(%6)", "%c12(%6)",
- AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
- AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
- AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
- "%13")
-
- "sub %10, %1 \n\t"
- "mov %2, %0 \n\t"
- "movl %7, %%ecx \n\t"
- "add %1, %%"FF_REG_c" \n\t"
- "movl %%ecx, (%0) \n\t"
-
- "test $1, %4 \n\t"
- " jnz 5f \n\t"
-
- "add"FF_OPSIZE" $4, %2 \n\t"
-
- "4: \n\t"
- "add $1, %1 \n\t"
- "cmp %8, %1 \n\t"
- " jb 3b \n\t"
- "mov %2, %0 \n\t"
- "movl %7, %%ecx \n\t"
- "add %1, %%"FF_REG_c" \n\t"
- "movl %%ecx, (%0) \n\t"
- "5: \n\t"
- "add %9, %k0 \n\t"
- "shr $2, %k0 \n\t"
- : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
- "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
- : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
- "i"(offsetof(CABACContext, bytestream)),
- "i"(offsetof(CABACContext, bytestream_end))
- TABLES_ARG
- : "%"FF_REG_c, "memory"
- );
- return coeff_count;
-}
-
-#define decode_significance_8x8 decode_significance_8x8_x86
-static int decode_significance_8x8_x86(CABACContext *c,
- uint8_t *significant_coeff_ctx_base,
- int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
- int minusindex= 4-(intptr_t)index;
- int bit;
- x86_reg coeff_count;
- x86_reg last=0;
- x86_reg state;
-
-#ifdef BROKEN_RELOCATIONS
- void *tables;
-
- __asm__ volatile(
- "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
- : "=&r"(tables)
- : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
- );
-#endif
-
- __asm__ volatile(
- "mov %1, %6 \n\t"
- "3: \n\t"
-
- "mov %10, %0 \n\t"
- "movzb (%0, %6), %6 \n\t"
- "add %9, %6 \n\t"
-
- BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
- "%5", "%q5", "%k0", "%b0",
- "%c12(%7)", "%c13(%7)",
- AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
- AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
- AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
- "%15")
-
- "mov %1, %6 \n\t"
- "test $1, %4 \n\t"
- " jz 4f \n\t"
-
-#ifdef BROKEN_RELOCATIONS
- "movzb %c14(%15, %q6), %6\n\t"
-#else
- "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t"
-#endif
- "add %11, %6 \n\t"
-
- BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
- "%5", "%q5", "%k0", "%b0",
- "%c12(%7)", "%c13(%7)",
- AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
- AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
- AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
- "%15")
-
- "mov %2, %0 \n\t"
- "mov %1, %6 \n\t"
- "mov %k6, (%0) \n\t"
-
- "test $1, %4 \n\t"
- " jnz 5f \n\t"
-
- "add"FF_OPSIZE" $4, %2 \n\t"
-
- "4: \n\t"
- "add $1, %6 \n\t"
- "mov %6, %1 \n\t"
- "cmp $63, %6 \n\t"
- " jb 3b \n\t"
- "mov %2, %0 \n\t"
- "mov %k6, (%0) \n\t"
- "5: \n\t"
- "addl %8, %k0 \n\t"
- "shr $2, %k0 \n\t"
- : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low),
- "=&r"(bit), "+&r"(c->range), "=&r"(state)
- : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
- REG64(sig_off), REG64(last_coeff_ctx_base),
- "i"(offsetof(CABACContext, bytestream)),
- "i"(offsetof(CABACContext, bytestream_end)),
- "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
- : "%"FF_REG_c, "memory"
- );
- return coeff_count;
-}
-#endif /* HAVE_7REGS && BROKEN_COMPILER */
-
-#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/h264_cabac.h b/libavcodec/x86/h264_cabac.h
new file mode 100644
index 0000000000..bbbe875320
--- /dev/null
+++ b/libavcodec/x86/h264_cabac.h
@@ -0,0 +1,40 @@
+/*
+;* x86-optimized H.264 CABAC decoding (non-SIMD)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_H264_CABAC_H
+#define AVCODEC_X86_H264_CABAC_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavcodec/cabac.h"
+
+int ff_h264_decode_significance_x86(CABACContext *c, size_t max_coeff,
+ uint8_t *significant_coeff_ctx_base,
+ int *index, ptrdiff_t last_off);
+int ff_h264_decode_significance_8x8_x86(CABACContext *c,
+ uint8_t *significant_coeff_ctx_base,
+ int *index, uint8_t *last_coeff_ctx_base,
+ const uint8_t *sig_off);
+
+#define decode_significance ff_h264_decode_significance_x86
+#define decode_significance_8x8 ff_h264_decode_significance_8x8_x86
+
+#endif // AVCODEC_X86_H264_CABAC_H