summaryrefslogtreecommitdiff
path: root/libavcodec/x86/me_cmp_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/me_cmp_init.c')
-rw-r--r--libavcodec/x86/me_cmp_init.c133
1 files changed, 20 insertions, 113 deletions
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index cb47d631f9..6dc59f5aa4 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -65,6 +65,18 @@ int ff_sad16_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int stride, int h);
int ff_sad16_approx_xy2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int stride, int h);
+int ff_vsad_intra8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ int line_size, int h);
+int ff_vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ int line_size, int h);
+int ff_vsad_intra16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ int line_size, int h);
+int ff_vsad8_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ int line_size, int h);
+int ff_vsad16_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ int line_size, int h);
+int ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ int line_size, int h);
#define hadamard_func(cpu) \
int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
@@ -177,49 +189,6 @@ static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
}
#undef SUM
-static int vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
- int line_size, int h)
-{
- int tmp;
-
- av_assert2((((int) pix) & 7) == 0);
- av_assert2((line_size & 7) == 0);
-
-#define SUM(in0, in1, out0, out1) \
- "movq (%0), " #out0 "\n" \
- "movq 8(%0), " #out1 "\n" \
- "add %2, %0\n" \
- "psadbw " #out0 ", " #in0 "\n" \
- "psadbw " #out1 ", " #in1 "\n" \
- "paddw " #in1 ", " #in0 "\n" \
- "paddw " #in0 ", %%mm6\n"
-
- __asm__ volatile (
- "movl %3, %%ecx\n"
- "pxor %%mm6, %%mm6\n"
- "pxor %%mm7, %%mm7\n"
- "movq (%0), %%mm0\n"
- "movq 8(%0), %%mm1\n"
- "add %2, %0\n"
- "jmp 2f\n"
- "1:\n"
-
- SUM(%%mm4, %%mm5, %%mm0, %%mm1)
- "2:\n"
- SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
- "subl $2, %%ecx\n"
- "jnz 1b\n"
-
- "movd %%mm6, %1\n"
- : "+r" (pix), "=r" (tmp)
- : "r" ((x86_reg) line_size), "m" (h)
- : "%ecx");
-
- return tmp;
-}
-#undef SUM
-
static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{
@@ -301,68 +270,6 @@ static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
}
#undef SUM
-static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
- int line_size, int h)
-{
- int tmp;
-
- av_assert2((((int) pix1) & 7) == 0);
- av_assert2((((int) pix2) & 7) == 0);
- av_assert2((line_size & 7) == 0);
-
-#define SUM(in0, in1, out0, out1) \
- "movq (%0), " #out0 "\n" \
- "movq (%1), %%mm2\n" \
- "movq 8(%0), " #out1 "\n" \
- "movq 8(%1), %%mm3\n" \
- "add %3, %0\n" \
- "add %3, %1\n" \
- "psubb %%mm2, " #out0 "\n" \
- "psubb %%mm3, " #out1 "\n" \
- "pxor %%mm7, " #out0 "\n" \
- "pxor %%mm7, " #out1 "\n" \
- "psadbw " #out0 ", " #in0 "\n" \
- "psadbw " #out1 ", " #in1 "\n" \
- "paddw " #in1 ", " #in0 "\n" \
- "paddw " #in0 ", %%mm6\n "
-
- __asm__ volatile (
- "movl %4, %%ecx\n"
- "pxor %%mm6, %%mm6\n"
- "pcmpeqw %%mm7, %%mm7\n"
- "psllw $15, %%mm7\n"
- "packsswb %%mm7, %%mm7\n"
- "movq (%0), %%mm0\n"
- "movq (%1), %%mm2\n"
- "movq 8(%0), %%mm1\n"
- "movq 8(%1), %%mm3\n"
- "add %3, %0\n"
- "add %3, %1\n"
- "psubb %%mm2, %%mm0\n"
- "psubb %%mm3, %%mm1\n"
- "pxor %%mm7, %%mm0\n"
- "pxor %%mm7, %%mm1\n"
- "jmp 2f\n"
- "1:\n"
-
- SUM(%%mm4, %%mm5, %%mm0, %%mm1)
- "2:\n"
- SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
- "subl $2, %%ecx\n"
- "jnz 1b\n"
-
- "movd %%mm6, %2\n"
- : "+r" (pix1), "+r" (pix2), "=r" (tmp)
- : "r" ((x86_reg) line_size), "m" (h)
- : "%ecx");
-
- return tmp;
-}
-#undef SUM
-
-
-
DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
0x0000000000000000ULL,
0x0001000100010001ULL,
@@ -667,14 +574,6 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
}
}
- if (INLINE_MMXEXT(cpu_flags)) {
- c->vsad[4] = vsad_intra16_mmxext;
-
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->vsad[0] = vsad16_mmxext;
- }
- }
-
#endif /* HAVE_INLINE_ASM */
if (EXTERNAL_MMX(cpu_flags)) {
@@ -704,9 +603,15 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
c->pix_abs[1][1] = ff_sad8_x2_mmxext;
c->pix_abs[1][2] = ff_sad8_y2_mmxext;
+ c->vsad[4] = ff_vsad_intra16_mmxext;
+ c->vsad[5] = ff_vsad_intra8_mmxext;
+
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext;
c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext;
+
+ c->vsad[0] = ff_vsad16_approx_mmxext;
+ c->vsad[1] = ff_vsad8_approx_mmxext;
}
}
@@ -724,8 +629,10 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
c->pix_abs[0][1] = ff_sad16_x2_sse2;
c->pix_abs[0][2] = ff_sad16_y2_sse2;
+ c->vsad[4] = ff_vsad_intra16_sse2;
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2;
+ c->vsad[0] = ff_vsad16_approx_sse2;
}
}
}