summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-01-17 01:40:45 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-01-17 02:37:30 +0100
commit67f5650a78de2567c58dbd7545434cc6d3ef9b7e (patch)
tree34b08ed769cd7a1f071bf9ff4eca1348481c0bf1 /libavcodec/x86
parent905c4dc2b0d564e1b9b6bc6eeca0b8915b81cd8c (diff)
parent9e12002f114d7e0b0ef69519518cdc0391e5e198 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: rv34: add NEON rv34_idct_add rv34: 1-pass inter MB reconstruction add SMJPEG muxer avformat: split out common SMJPEG code pictordec: Use bytestream2 functions avconv: use avcodec_encode_audio2() pcmenc: use AVCodec.encode2() avcodec: bump minor version and add APIChanges for the new audio encoding API avcodec: Add avcodec_encode_audio2() as replacement for avcodec_encode_audio() avcodec: add a public function, avcodec_fill_audio_frame(). rv34: Intra 16x16 handling rv34: Inter/intra MB code split Conflicts: Changelog libavcodec/avcodec.h libavcodec/pictordec.c libavcodec/utils.c libavcodec/version.h libavcodec/x86/rv34dsp.asm libavformat/version.h Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/rv34dsp.asm83
-rw-r--r--libavcodec/x86/rv34dsp_init.c13
2 files changed, 82 insertions, 14 deletions
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index a70ad07e87..75bf1ae08a 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -35,21 +35,84 @@ SECTION .text
sar %1, 10
%endmacro
-%macro rv34_idct_dequant4x4_dc 1
-cglobal rv34_idct_dequant4x4_%1_mmx2, 1, 2, 0
+%macro rv34_idct 1
+cglobal rv34_idct_%1_mmx2, 1, 2, 0
movsx r1, word [r0]
IDCT_DC r1
- movd mm0, r1d
- pshufw mm0, mm0, 0
- movq [r0+ 0], mm0
- movq [r0+16], mm0
- movq [r0+32], mm0
- movq [r0+48], mm0
+ movd m0, r1d
+ pshufw m0, m0, 0
+ movq [r0+ 0], m0
+ movq [r0+ 8], m0
+ movq [r0+16], m0
+ movq [r0+24], m0
REP_RET
%endmacro
INIT_MMX
%define IDCT_DC IDCT_DC_ROUND
-rv34_idct_dequant4x4_dc dc
+rv34_idct dc
%define IDCT_DC IDCT_DC_NOROUND
-rv34_idct_dequant4x4_dc dc_noround
+rv34_idct dc_noround
+
+; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
+cglobal rv34_idct_dc_add_mmx, 3, 3
+ ; calculate DC
+ IDCT_DC_ROUND r2
+ pxor m1, m1
+ movd m0, r2
+ psubw m1, m0
+ packuswb m0, m0
+ packuswb m1, m1
+ punpcklbw m0, m0
+ punpcklbw m1, m1
+ punpcklwd m0, m0
+ punpcklwd m1, m1
+
+ ; add DC
+ lea r2, [r0+r1*2]
+ movh m2, [r0]
+ movh m3, [r0+r1]
+ movh m4, [r2]
+ movh m5, [r2+r1]
+ paddusb m2, m0
+ paddusb m3, m0
+ paddusb m4, m0
+ paddusb m5, m0
+ psubusb m2, m1
+ psubusb m3, m1
+ psubusb m4, m1
+ psubusb m5, m1
+ movh [r0], m2
+ movh [r0+r1], m3
+ movh [r2], m4
+ movh [r2+r1], m5
+ RET
+
+; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
+INIT_XMM
+cglobal rv34_idct_dc_add_sse4, 3, 3, 6
+ ; load data
+ IDCT_DC_ROUND r2
+ pxor m1, m1
+
+ ; calculate DC
+ movd m0, r2
+ lea r2, [r0+r1*2]
+ movd m2, [r0]
+ movd m3, [r0+r1]
+ pshuflw m0, m0, 0
+ movd m4, [r2]
+ movd m5, [r2+r1]
+ punpcklqdq m0, m0
+ punpckldq m2, m3
+ punpckldq m4, m5
+ punpcklbw m2, m1
+ punpcklbw m4, m1
+ paddw m2, m0
+ paddw m4, m0
+ packuswb m2, m4
+ movd [r0], m2
+ pextrd [r0+r1], m2, 1
+ pextrd [r2], m2, 2
+ pextrd [r2+r1], m2, 3
+ RET
diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c
index 4317e9b23b..f3d2e172e7 100644
--- a/libavcodec/x86/rv34dsp_init.c
+++ b/libavcodec/x86/rv34dsp_init.c
@@ -24,17 +24,22 @@
#include "libavcodec/dsputil.h"
#include "libavcodec/rv34dsp.h"
-void ff_rv34_idct_dequant4x4_dc_mmx2(DCTELEM *block);
-void ff_rv34_idct_dequant4x4_dc_noround_mmx2(DCTELEM *block);
+void ff_rv34_idct_dc_mmx2(DCTELEM *block);
+void ff_rv34_idct_dc_noround_mmx2(DCTELEM *block);
+void ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
+void ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
{
#if HAVE_YASM
int mm_flags = av_get_cpu_flags();
+ if (mm_flags & AV_CPU_FLAG_MMX)
+ c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) {
- c->rv34_inv_transform_dc_tab[0] = ff_rv34_idct_dequant4x4_dc_mmx2;
- c->rv34_inv_transform_dc_tab[1] = ff_rv34_idct_dequant4x4_dc_noround_mmx2;
+ c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
}
+ if (mm_flags & AV_CPU_FLAG_SSE4)
+ c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
#endif
}