summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/arm/vp3dsp_neon.S22
-rw-r--r--libavcodec/ppc/vp3dsp_altivec.c2
-rw-r--r--libavcodec/vp3.c5
-rw-r--r--libavcodec/vp3dsp.c5
-rw-r--r--libavcodec/vp3dsp.h2
-rw-r--r--libavcodec/x86/vp3dsp.asm27
-rw-r--r--libavcodec/x86/vp3dsp_init.c2
7 files changed, 45 insertions, 20 deletions
diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S
index e09de57281..e5ecfc337e 100644
--- a/libavcodec/arm/vp3dsp_neon.S
+++ b/libavcodec/arm/vp3dsp_neon.S
@@ -108,14 +108,20 @@ endfunc
function vp3_idct_start_neon
vpush {d8-d15}
+ vmov.i16 q4, #0
+ vmov.i16 q5, #0
movrel r3, vp3_idct_constants
vld1.64 {d0-d1}, [r3,:128]
- vld1.64 {d16-d19}, [r2,:128]!
- vld1.64 {d20-d23}, [r2,:128]!
- vld1.64 {d24-d27}, [r2,:128]!
+ vld1.64 {d16-d19}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
+ vld1.64 {d20-d23}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
+ vld1.64 {d24-d27}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
vadd.s16 q1, q8, q12
vsub.s16 q8, q8, q12
- vld1.64 {d28-d31}, [r2,:128]!
+ vld1.64 {d28-d31}, [r2,:128]
+ vst1.64 {q4-q5}, [r2,:128]!
vp3_idct_core_neon:
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
@@ -345,10 +351,12 @@ function ff_vp3_idct_add_neon, export=1
endfunc
function ff_vp3_idct_dc_add_neon, export=1
- ldrsh r2, [r2]
+ ldrsh r12, [r2]
mov r3, r0
- add r2, r2, #15
- vdup.16 q15, r2
+ add r12, r12, #15
+ vdup.16 q15, r12
+ mov r12, 0
+ strh r12, [r2]
vshr.s16 q15, q15, #5
vld1.8 {d0}, [r0,:64], r1
diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c
index 75a36779ce..6adf9aefac 100644
--- a/libavcodec/ppc/vp3dsp_altivec.c
+++ b/libavcodec/ppc/vp3dsp_altivec.c
@@ -140,6 +140,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
PUT(b5) dst += stride;
PUT(b6) dst += stride;
PUT(b7)
+ memset(block, 0, sizeof(*block) * 64);
}
static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
@@ -171,6 +172,7 @@ static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
ADD(b5) dst += stride;
ADD(b6) dst += stride;
ADD(b7)
+ memset(block, 0, sizeof(*block) * 64);
}
#endif /* HAVE_ALTIVEC */
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 0340c22bb2..9417535314 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -138,6 +138,7 @@ typedef struct Vp3DecodeContext {
DSPContext dsp;
VideoDSPContext vdsp;
VP3DSPContext vp3dsp;
+ DECLARE_ALIGNED(16, DCTELEM, block)[64];
int flipped_image;
int last_slice_end;
int skip_loop_filter;
@@ -1458,7 +1459,7 @@ static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int
static void render_slice(Vp3DecodeContext *s, int slice)
{
int x, y, i, j, fragment;
- LOCAL_ALIGNED_16(DCTELEM, block, [64]);
+ DCTELEM *block = s->block;
int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
int motion_halfpel_index;
uint8_t *motion_source;
@@ -1571,8 +1572,6 @@ static void render_slice(Vp3DecodeContext *s, int slice)
}
}
- s->dsp.clear_block(block);
-
/* invert DCT and place (or add) in final output */
if (s->all_fragments[i].coding_method == MODE_INTRA) {
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index 9b0b5d0a9c..9e6209dfdd 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -215,14 +215,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
idct(dest, line_size, block, 1);
+ memset(block, 0, sizeof(*block) * 64);
}
static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
idct(dest, line_size, block, 2);
+ memset(block, 0, sizeof(*block) * 64);
}
static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
- const DCTELEM *block/*align 16*/){
+ DCTELEM *block/*align 16*/){
int i, dc = (block[0] + 15) >> 5;
for(i = 0; i < 8; i++){
@@ -236,6 +238,7 @@ static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
dest[7] = av_clip_uint8(dest[7] + dc);
dest += line_size;
}
+ block[0] = 0;
}
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h
index 3781bbf3a7..feb300017a 100644
--- a/libavcodec/vp3dsp.h
+++ b/libavcodec/vp3dsp.h
@@ -25,7 +25,7 @@
typedef struct VP3DSPContext {
void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
- void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block);
+ void (*idct_dc_add)(uint8_t *dest, int line_size, DCTELEM *block);
void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index fc1e776a13..d2c464c5cf 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -562,6 +562,13 @@ cglobal vp3_idct_put, 3, 4, 9
%endif
%assign %%i %%i+64
%endrep
+
+ pxor m0, m0
+%assign %%offset 0
+%rep 128/mmsize
+ mova [r2+%%offset], m0
+%assign %%offset %%offset+mmsize
+%endrep
RET
cglobal vp3_idct_add, 3, 4, 9
@@ -600,6 +607,11 @@ cglobal vp3_idct_add, 3, 4, 9
movhps [r0+r1], m0
%endif
lea r0, [r0+r1*2]
+%assign %%offset 0
+%rep 32/mmsize
+ mova [r2+%%offset], m4
+%assign %%offset %%offset+mmsize
+%endrep
add r2, 32
dec r3
jg .loop
@@ -620,7 +632,7 @@ vp3_idct_funcs
paddusb m2, m0
movq m4, [r0+r1*2]
paddusb m3, m0
- movq m5, [r0+r3 ]
+ movq m5, [r0+r2 ]
paddusb m4, m0
paddusb m5, m0
psubusb m2, m1
@@ -630,7 +642,7 @@ vp3_idct_funcs
movq [r0+r1 ], m3
psubusb m5, m1
movq [r0+r1*2], m4
- movq [r0+r3 ], m5
+ movq [r0+r2 ], m5
%endmacro
INIT_MMX mmxext
@@ -638,11 +650,12 @@ cglobal vp3_idct_dc_add, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif
- lea r3, [r1*3]
- movsx r2, word [r2]
- add r2, 15
- sar r2, 5
- movd m0, r2d
+ movsx r3, word [r2]
+ mov word [r2], 0
+ lea r2, [r1*3]
+ add r3, 15
+ sar r3, 5
+ movd m0, r3d
pshufw m0, m0, 0x0
pxor m1, m1
psubw m1, m0
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index bbe74ba44a..95beeabfec 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -32,7 +32,7 @@ void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
- const DCTELEM *block);
+ DCTELEM *block);
void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
int *bounding_values);