1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
;*******************************************************************************
;* x86-optimized H.264 CABAC decoding (non-SIMD)
;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
;* Copyright (c) 2020 Anton Khirnov <anton@khirnov.net>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
%define H264_NORM_SHIFT_OFFSET 0
%define H264_LPS_RANGE_OFFSET 512
%define H264_MLPS_STATE_OFFSET 1024
%define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280
%define CC_OFF_LOW 0
%define CC_OFF_RANGE 4
%define CC_OFF_BYTESTREAM 24
%define CC_OFF_BYTESTREAM_END 32
cextern h264_cabac_tables
SECTION .text
%macro GET_CABAC 0
push rcx
movzx bitd, byte [statep]
mov tmpd, ranged
and ranged, 0xc0
lea ecx, [bitd + ranged * 2]
movzx ranged, byte [tablesq + rcx + H264_LPS_RANGE_OFFSET]
sub tmpd, ranged
mov ecx, tmpd
shl tmpd, 17
cmp tmpd, lowd
cmova ranged, ecx
sbb rcx, rcx
and tmpd, ecx
xor bitq, rcx
sub lowd, tmpd
movzx ecx, byte [tablesq + H264_NORM_SHIFT_OFFSET + rangeq]
shl ranged, cl
shl lowd, cl
movzx tmpd, byte [tablesq + H264_MLPS_STATE_OFFSET + 128 + bitq]
mov [statep], tmpb
test loww, loww
jnz %%finish
movzx tmpd, word [bytestreamq]
cmp bytestreamq, bytestream_endq
jge %%skip_bs_advance
add bytestreamq, 2
%%skip_bs_advance:
lea ecx, [lowd - 1]
xor ecx, lowd
shr ecx, 15
bswap tmpd
shr tmpd, 15
movzx ecx, byte [tablesq + rcx + H264_NORM_SHIFT_OFFSET]
sub tmpd, 0xffff
neg ecx
add ecx, 7
shl tmpd, cl
add lowd, tmpd
%%finish:
pop rcx
%endmacro
%macro DECODE_SIGNIFICANCE 1
; store the values we won't need until the end on stack
mov qword [rsp], 4
sub [rsp], indexq
mov qword [rsp + gprsize], ccq
; load CABACContext fields
mov lowd, [ccq + CC_OFF_LOW]
mov ranged, [ccq + CC_OFF_RANGE]
mov bytestreamq, [ccq + CC_OFF_BYTESTREAM]
mov bytestream_endq, [ccq + CC_OFF_BYTESTREAM_END]
; recycle the cabac context register for tables
%define tablesq ccq
lea tablesq, [h264_cabac_tables]
%if %1
%define loop_count 63
%define statep stateq
%else
sub max_coeffd, 1
%define loop_count max_coeffd
%define statep significant_coeff_ctx_baseq + counterq
%endif
xor counterq, counterq
.loop:
%if %1
movzx stated, byte [sig_offq + counterq]
add stateq, significant_coeff_ctx_baseq
%endif
GET_CABAC
test bitq, 1
jz .continue
%if %1
movzx stated, byte [tablesq + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET + counterq]
add stateq, last_coeff_ctx_baseq
%else
add significant_coeff_ctx_baseq, last_offq
%endif
GET_CABAC
%if !%1
sub significant_coeff_ctx_baseq, last_offq
%endif
mov [indexq], counterd
test bitq, 1
jnz .finish
add indexq, 4
.continue:
add counterd, 1
cmp counterd, loop_count
jb .loop
mov [indexq], counterd
.finish:
; update CABACContext
mov ccq, [rsp + gprsize]
mov [ccq + CC_OFF_LOW], lowd
mov [ccq + CC_OFF_RANGE], ranged
mov [ccq + CC_OFF_BYTESTREAM], bytestreamq
add indexq, [rsp]
shr indexq, 2
movifnidn rax, indexq
RET
%endmacro
; int ff_h264_decode_significance_x86(CABACContext *cc, int max_coeff,
; uint8_t *significant_coeff_ctx_base, int *index, ptrdiff_t last_off)
cglobal h264_decode_significance_x86, 5, 12, 0, gprsize * 2, \
cc, max_coeff, significant_coeff_ctx_base, index, last_off, \
bytestream, bytestream_end, low, range, bit, counter, tmp
DECODE_SIGNIFICANCE 0
cglobal h264_decode_significance_8x8_x86, 5, 13, 0, gprsize * 2, \
cc, significant_coeff_ctx_base, index, last_coeff_ctx_base, sig_off, \
bytestream, bytestream_end, low, range, bit, counter, tmp, state
DECODE_SIGNIFICANCE 1
|