summaryrefslogtreecommitdiff
path: root/libavcodec/arm/vp8_armv6.S
blob: 88fa3985905270c7086f4740c7c760c16816558b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/**
 * Copyright (C) 2010 Mans Rullgard
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "asm.S"

        .syntax         unified

.macro rac_get_prob     h, bs, buf, cw, pr, t0, t1
        adds            \bs, \bs, \t0
        lsl             \cw, \cw, \t0
        lsl             \t0, \h,  \t0
        rsb             \h,  \pr, #256
        ldrhcs          \t1, [\buf], #2
        smlabb          \h,  \t0, \pr, \h
        rev16cs         \t1, \t1
        orrcs           \cw, \cw, \t1, lsl \bs
        subcs           \bs, \bs, #16
        lsr             \h,  \h,  #8
        cmp             \cw, \h,  lsl #16
        subge           \cw, \cw, \h,  lsl #16
        subge           \h,  \t0, \h
.endm

.macro rac_get_128      h, bs, buf, cw, t0, t1
        adds            \bs, \bs, \t0
        lsl             \cw, \cw, \t0
        lsl             \t0, \h,  \t0
        ldrhcs          \t1, [\buf], #2
        mov             \h,  #128
        rev16cs         \t1, \t1
        add             \h,  \h,  \t0, lsl #7
        orrcs           \cw, \cw, \t1, lsl \bs
        subcs           \bs, \bs, #16
        lsr             \h,  \h,  #8
        cmp             \cw, \h,  lsl #16
        subge           \cw, \cw, \h,  lsl #16
        subge           \h,  \t0, \h
.endm

function ff_decode_block_coeffs_armv6, export=1
        push            {r0,r1,r4-r11,lr}
        movrel          lr,  X(ff_vp56_norm_shift)
        ldrd            r4,  r5,  [sp, #44]             @ token_prob, qmul
        cmp             r3,  #0
        ldr             r11, [r5]
        ldm             r0,  {r5-r7}                    @ high, bits, buf
        pkhtbne         r11, r11, r11, asr #16
        ldr             r8,  [r0, #16]                  @ code_word
0:
        ldrb            r9,  [lr, r5]
        add             r3,  r3,  #1
        ldrb            r0,  [r4, #1]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        blt             2f

        ldrb            r9,  [lr, r5]
        ldrb            r0,  [r4, #2]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             3f

        add             r4,  r3,  r3,  lsl #5
        sxth            r12, r11
        add             r4,  r2,  r4
        adds            r6,  r6,  r9
        add             r4,  r4,  #11
        lsl             r8,  r8,  r9
        ldrhcs          r10, [r7], #2
        lsl             r9,  r5,  r9
        mov             r5,  #128
        rev16cs         r10, r10
        add             r5,  r5,  r9,  lsl #7
        orrcs           r8,  r8,  r10, lsl r6
        subcs           r6,  r6,  #16
        lsr             r5,  r5,  #8
        cmp             r8,  r5,  lsl #16
        movrel          r10, zigzag_scan-1
        subge           r8,  r8,  r5,  lsl #16
        subge           r5,  r9,  r5
        ldrb            r10, [r10, r3]
        rsbge           r12, r12, #0
        cmp             r3,  #16
        strh            r12, [r1, r10]
        bge             6f
5:
        ldrb            r9,  [lr, r5]
        ldrb            r0,  [r4]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        pkhtb           r11, r11, r11, asr #16
        bge             0b

6:
        ldr             r0,  [sp]
        ldr             r9,  [r0, #12]
        cmp             r7,  r9
        movhi           r7,  r9
        stm             r0,  {r5-r7}                    @ high, bits, buf
        str             r8,  [r0, #16]                  @ code_word

        add             sp,  sp,  #8
        mov             r0,  r3
        pop             {r4-r11,pc}
2:
        add             r4,  r3,  r3,  lsl #5
        cmp             r3,  #16
        add             r4,  r2,  r4
        pkhtb           r11, r11, r11, asr #16
        bne             0b
        b               6b
3:
        ldrb            r0,  [r4, #3]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             1f

        mov             r12, #2
        ldrb            r0,  [r4, #4]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        addge           r12, #1
        ldrb            r9,  [lr, r5]
        blt             4f
        ldrb            r0,  [r4, #5]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        addge           r12, #1
        ldrb            r9,  [lr, r5]
        b               4f
1:
        ldrb            r0,  [r4, #6]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             3f

        ldrb            r0,  [r4, #7]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             2f

        mov             r12, #5
        mov             r0,  #159
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        addge           r12, r12, #1
        ldrb            r9,  [lr, r5]
        b               4f
2:
        mov             r12, #7
        mov             r0,  #165
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        addge           r12, r12, #2
        ldrb            r9,  [lr, r5]
        mov             r0,  #145
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        addge           r12, r12, #1
        ldrb            r9,  [lr, r5]
        b               4f
3:
        ldrb            r0,  [r4, #8]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        addge           r4,  r4,  #1
        ldrb            r9,  [lr, r5]
        movge           r12, #2
        movlt           r12, #0
        ldrb            r0,  [r4, #9]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        mov             r9,  #8
        addge           r12, r12, #1
        movrel          r4,  ff_vp8_dct_cat_prob
        lsl             r9,  r9,  r12
        ldr             r4,  [r4, r12, lsl #2]
        add             r12, r9,  #3
        mov             r1,  #0
        ldrb            r0,  [r4], #1
1:
        ldrb            r9,  [lr, r5]
        lsl             r1,  r1,  #1
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r0,  [r4], #1
        addge           r1,  r1,  #1
        cmp             r0,  #0
        bne             1b
        ldrb            r9,  [lr, r5]
        add             r12, r12, r1
        ldr             r1,  [sp, #4]
4:
        add             r4,  r3,  r3,  lsl #5
        add             r4,  r2,  r4
        add             r4,  r4,  #22
        rac_get_128     r5,  r6,  r7,  r8,  r9,  r10
        rsbge           r12, r12, #0
        smulbb          r12, r12, r11
        movrel          r9,  zigzag_scan-1
        ldrb            r9,  [r9, r3]
        cmp             r3,  #16
        strh            r12, [r1, r9]
        bge             6b
        b               5b
endfunc

        .section        .rodata
zigzag_scan:
        .byte            0,  2,  8, 16
        .byte           10,  4,  6, 12
        .byte           18, 24, 26, 20
        .byte           14, 22, 28, 30