summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_cabac.asm
blob: 6cea03e66417e47b74362dc3f03c3849eaedfe82 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
;*******************************************************************************
;* x86-optimized H.264 CABAC decoding (non-SIMD)
;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
;* Copyright (c) 2020 Anton Khirnov <anton@khirnov.net>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%include "libavutil/x86/x86util.asm"

%define H264_NORM_SHIFT_OFFSET                 0
%define H264_LPS_RANGE_OFFSET                  512
%define H264_MLPS_STATE_OFFSET                 1024
%define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280

%define CC_OFF_LOW            0
%define CC_OFF_RANGE          4
%define CC_OFF_BYTESTREAM     24
%define CC_OFF_BYTESTREAM_END 32

cextern h264_cabac_tables

SECTION .text

%macro GET_CABAC 0
    push rcx

    movzx bitd,     byte [statep]
    mov   tmpd,     ranged
    and   ranged,   0xc0
    lea   ecx,      [bitd + ranged * 2]
    movzx ranged,   byte [tablesq + rcx + H264_LPS_RANGE_OFFSET]
    sub   tmpd,     ranged
    mov   ecx,      tmpd
    shl   tmpd,     17
    cmp   tmpd,     lowd
    cmova ranged,   ecx
    sbb   rcx,      rcx
    and   tmpd,     ecx
    xor   bitq,     rcx
    sub   lowd,     tmpd

    movzx ecx,      byte [tablesq + H264_NORM_SHIFT_OFFSET       + rangeq]
    shl   ranged,   cl
    shl   lowd,     cl

    movzx tmpd,     byte [tablesq + H264_MLPS_STATE_OFFSET + 128 + bitq]
    mov   [statep], tmpb

    test  loww,     loww
    jnz   %%finish

    movzx tmpd,        word [bytestreamq]
    cmp   bytestreamq, bytestream_endq
    jge   %%skip_bs_advance
    add   bytestreamq, 2
%%skip_bs_advance:

    lea   ecx,  [lowd - 1]
    xor   ecx,  lowd
    shr   ecx,  15
    bswap tmpd
    shr   tmpd, 15

    movzx ecx,  byte [tablesq + rcx + H264_NORM_SHIFT_OFFSET]
    sub   tmpd, 0xffff
    neg   ecx
    add   ecx,  7
    shl   tmpd, cl
    add   lowd, tmpd
%%finish:
    pop rcx
%endmacro

%macro DECODE_SIGNIFICANCE 1
    ; store the values we won't need until the end on stack
    mov qword [rsp],           4
    sub       [rsp],           indexq
    mov qword [rsp + gprsize], ccq

    ; load CABACContext fields
    mov lowd,            [ccq + CC_OFF_LOW]
    mov ranged,          [ccq + CC_OFF_RANGE]
    mov bytestreamq,     [ccq + CC_OFF_BYTESTREAM]
    mov bytestream_endq, [ccq + CC_OFF_BYTESTREAM_END]

    ; recycle the cabac context register for tables
    %define tablesq ccq
    lea tablesq, [h264_cabac_tables]

%if %1
    %define loop_count 63
    %define statep stateq
%else
    sub     max_coeffd, 1
    %define loop_count  max_coeffd
    %define statep      significant_coeff_ctx_baseq + counterq
%endif

    xor counterq, counterq

.loop:
%if %1
    movzx stated, byte [sig_offq + counterq]
    add   stateq, significant_coeff_ctx_baseq
%endif

    GET_CABAC

    test bitq, 1
    jz .continue

%if %1
    movzx stated, byte [tablesq + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET + counterq]
    add   stateq, last_coeff_ctx_baseq
%else
    add significant_coeff_ctx_baseq, last_offq
%endif

    GET_CABAC

%if !%1
    sub significant_coeff_ctx_baseq, last_offq
%endif

    mov [indexq], counterd

    test bitq, 1
    jnz  .finish

    add  indexq, 4

.continue:
    add counterd, 1
    cmp counterd, loop_count
    jb .loop

    mov [indexq], counterd

.finish:
    ; update CABACContext
    mov ccq,                       [rsp + gprsize]
    mov [ccq + CC_OFF_LOW],        lowd
    mov [ccq + CC_OFF_RANGE],      ranged
    mov [ccq + CC_OFF_BYTESTREAM], bytestreamq

    add indexq, [rsp]
    shr indexq, 2

    movifnidn rax, indexq

    RET
%endmacro

; int ff_h264_decode_significance_x86(CABACContext *cc, int max_coeff,
;       uint8_t *significant_coeff_ctx_base, int *index, ptrdiff_t last_off)
cglobal h264_decode_significance_x86, 5, 12, 0, gprsize * 2,        \
        cc, max_coeff, significant_coeff_ctx_base, index, last_off, \
        bytestream, bytestream_end, low, range, bit, counter, tmp
DECODE_SIGNIFICANCE 0

cglobal h264_decode_significance_8x8_x86, 5, 13, 0, gprsize * 2,              \
        cc, significant_coeff_ctx_base, index, last_coeff_ctx_base, sig_off,  \
        bytestream, bytestream_end, low, range, bit, counter, tmp, state
DECODE_SIGNIFICANCE 1