summaryrefslogtreecommitdiff
path: root/libavcodec/i386
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2003-12-02 20:28:10 +0000
committerMichael Niedermayer <michaelni@gmx.at>2003-12-02 20:28:10 +0000
commit359f98ded95ec65f7109b25f3cfaa9189c4fab9c (patch)
treed157f9af3baac7c89d33223fbbd63a02a81cb007 /libavcodec/i386
parente1f03acc420383cf39310c9089627c52838f8334 (diff)
h263_v_loop_filter_mmx
Originally committed as revision 2552 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386')
-rw-r--r--libavcodec/i386/dsputil_mmx.c92
1 files changed, 92 insertions, 0 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 3bded6f247..37880290cd 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -22,6 +22,8 @@
#include "../dsputil.h"
#include "../simple_idct.h"
+extern const uint8_t ff_h263_loop_filter_strength[32];
+
int mm_flags; /* multimedia extension flags */
/* pixel operations */
@@ -34,6 +36,8 @@ static const uint64_t ff_pw_3 __attribute__ ((aligned(8))) = 0x0003000300030003
static const uint64_t ff_pw_16 __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000FULL;
+static const uint64_t ff_pb_FC __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
+
#define JUMPALIGN() __asm __volatile (".balign 8"::)
#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
@@ -465,6 +469,92 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
dst[i+0] += src[i+0];
}
+static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
+ const int strength= ff_h263_loop_filter_strength[qscale];
+
+ asm volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movq %0, %%mm0 \n\t"
+ "movq %0, %%mm1 \n\t"
+ "movq %3, %%mm2 \n\t"
+ "movq %3, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+ "psubw %%mm3, %%mm1 \n\t"
+ "movq %1, %%mm2 \n\t"
+ "movq %1, %%mm3 \n\t"
+ "movq %2, %%mm4 \n\t"
+ "movq %2, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "psubw %%mm2, %%mm4 \n\t"
+ "psubw %%mm3, %%mm5 \n\t"
+ "psllw $2, %%mm4 \n\t"
+ "psllw $2, %%mm5 \n\t"
+ "paddw %%mm0, %%mm4 \n\t"
+ "paddw %%mm1, %%mm5 \n\t"
+ "pxor %%mm6, %%mm6 \n\t"
+ "pcmpgtw %%mm4, %%mm6 \n\t"
+ "pcmpgtw %%mm5, %%mm7 \n\t"
+ "pxor %%mm6, %%mm4 \n\t"
+ "pxor %%mm7, %%mm5 \n\t"
+ "psubw %%mm6, %%mm4 \n\t"
+ "psubw %%mm7, %%mm5 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm5 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t" //abs(d)
+ "packsswb %%mm7, %%mm6 \n\t" //sign(d)
+ "pxor %%mm7, %%mm7 \n\t"
+ "movd %4, %%mm2 \n\t"
+ "punpcklbw %%mm2, %%mm2 \n\t"
+ "punpcklbw %%mm2, %%mm2 \n\t"
+ "punpcklbw %%mm2, %%mm2 \n\t" //2*strength
+ "psubusb %%mm4, %%mm2 \n\t" // S(2*strength - abs(d))
+ "movq %%mm2, %%mm3 \n\t" // S(2*strength - abs(d))
+ "psubusb %%mm4, %%mm3 \n\t" // S(S(2*strength - abs(d)) - abs(d))
+ "psubb %%mm3, %%mm2 \n\t" // MIN(abs(d), S(2*strength - abs(d)))
+ "movq %1, %%mm3 \n\t"
+ "movq %2, %%mm4 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ "pxor %%mm6, %%mm4 \n\t"
+ "paddusb %%mm2, %%mm3 \n\t"
+ "psubusb %%mm2, %%mm4 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ "pxor %%mm6, %%mm4 \n\t"
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm4, %2 \n\t"
+ "paddusb %%mm2, %%mm2 \n\t"
+ "packsswb %%mm1, %%mm0 \n\t"
+ "pcmpgtb %%mm0, %%mm7 \n\t"
+ "pxor %%mm7, %%mm0 \n\t"
+ "psubb %%mm7, %%mm0 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "psubusb %%mm2, %%mm0 \n\t"
+ "psubb %%mm0, %%mm1 \n\t"
+ "pand %5, %%mm1 \n\t"
+ "psrlw $2, %%mm1 \n\t"
+ "pxor %%mm7, %%mm1 \n\t"
+ "psubb %%mm7, %%mm1 \n\t"
+ "movq %0, %%mm3 \n\t"
+ "movq %3, %%mm4 \n\t"
+ "psubb %%mm1, %%mm3 \n\t"
+ "paddb %%mm1, %%mm4 \n\t"
+ "movq %%mm3, %0 \n\t"
+ "movq %%mm4, %3 \n\t"
+
+ : "+m" (*(uint64_t*)(src - 2*stride)),
+ "+m" (*(uint64_t*)(src - 1*stride)),
+ "+m" (*(uint64_t*)(src + 0*stride)),
+ "+m" (*(uint64_t*)(src + 1*stride))
+ : "g" (2*strength), "m"(ff_pb_FC)
+ );
+}
+
#ifdef CONFIG_ENCODERS
static int pix_norm1_mmx(uint8_t *pix, int line_size) {
int tmp;
@@ -1691,6 +1781,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->pix_norm1 = pix_norm1_mmx;
c->sse[0] = sse16_mmx;
#endif //CONFIG_ENCODERS
+
+ c->h263_v_loop_filter= h263_v_loop_filter_mmx;
if (mm_flags & MM_MMXEXT) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;