summaryrefslogtreecommitdiff
path: root/libavcodec/x86/lossless_videodsp_init.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-01-22 22:55:49 +0100
committerMichael Niedermayer <michaelni@gmx.at>2014-01-22 22:55:49 +0100
commitfad49aae28cf3e47791f03a04d7aad328a6d6fdf (patch)
tree77a3ce9dbe4baebb9a9492f275c34d64c2b0758a /libavcodec/x86/lossless_videodsp_init.c
parentfee97f25fa1275e2a35485cb16283a466c28aadc (diff)
avcodec/x86/lossless_videodsp: Port sub_hfyu_median_prediction_mmxext to int16
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/lossless_videodsp_init.c')
-rw-r--r--libavcodec/x86/lossless_videodsp_init.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index 4eca2a11b1..eac3395eb2 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -20,6 +20,8 @@
#include "../lossless_videodsp.h"
#include "libavutil/x86/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/mathops.h"
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
@@ -29,6 +31,51 @@ int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src,
int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
void ff_add_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
+static void sub_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *src1,
+ const uint16_t *src2, unsigned mask, int w,
+ int *left, int *left_top)
+{
+ x86_reg i=0;
+ uint16_t l, lt;
+
+ __asm__ volatile(
+ "movd %5, %%mm7 \n\t"
+ "pshufw $0, %%mm7, %%mm7 \n\t"
+ "movq (%1, %0), %%mm0 \n\t" // LT
+ "psllq $16, %%mm0 \n\t"
+ "1: \n\t"
+ "movq (%1, %0), %%mm1 \n\t" // T
+ "movq -2(%2, %0), %%mm2 \n\t" // L
+ "movq (%2, %0), %%mm3 \n\t" // X
+ "movq %%mm2, %%mm4 \n\t" // L
+ "psubw %%mm0, %%mm2 \n\t"
+ "paddw %%mm1, %%mm2 \n\t" // L + T - LT
+ "pand %%mm7, %%mm2 \n\t"
+ "movq %%mm4, %%mm5 \n\t" // L
+ "pmaxsw %%mm1, %%mm4 \n\t" // max(T, L)
+ "pminsw %%mm5, %%mm1 \n\t" // min(T, L)
+ "pminsw %%mm2, %%mm4 \n\t"
+ "pmaxsw %%mm1, %%mm4 \n\t"
+ "psubw %%mm4, %%mm3 \n\t" // dst - pred
+ "pand %%mm7, %%mm3 \n\t"
+ "movq %%mm3, (%3, %0) \n\t"
+ "add $8, %0 \n\t"
+ "movq -2(%1, %0), %%mm0 \n\t" // LT
+ "cmp %4, %0 \n\t"
+ " jb 1b \n\t"
+ : "+r" (i)
+ : "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)2*w), "rm"(mask)
+ );
+
+ l= *left;
+ lt= *left_top;
+
+ dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&mask);
+
+ *left_top= src1[w-1];
+ *left = src2[w-1];
+}
+
void ff_llviddsp_init_x86(LLVidDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -40,6 +87,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->add_hfyu_median_prediction_int16 = ff_add_hfyu_median_prediction_int16_mmxext;
+ c->sub_hfyu_median_prediction_int16 = sub_hfyu_median_prediction_int16_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {