summaryrefslogtreecommitdiff
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2016-02-08 19:04:00 +0000
committerTimothy Gu <timothygu99@gmail.com>2016-02-08 13:35:24 -0800
commit253209ac444947d4735be84469c582df2718a59e (patch)
tree5c9f80bc40f7b82f990b172b9ccc264c3c0c42c3 /libavfilter/x86
parenta25c5dbb5ee0f54c474d9caf43359cd0f61ae1bf (diff)
vf_blend: Add SSE2 optimization for multiply
5 times faster than C, 3 times overall.
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/vf_blend.asm29
-rw-r--r--libavfilter/x86/vf_blend_init.c2
2 files changed, 31 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 730be77d00..9388a74250 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -24,6 +24,7 @@
SECTION_RODATA
+pw_1: times 8 dw 1
pw_128: times 8 dw 128
pw_255: times 8 dw 255
pb_127: times 16 db 127
@@ -101,6 +102,34 @@ BLEND_INIT difference128, 4
jl .loop
BLEND_END
+BLEND_INIT multiply, 4
+ pxor m2, m2
+ mova m3, [pw_1]
+.nextrow:
+ mov xq, widthq
+
+ .loop:
+ ; word
+ ; |--|
+ movh m0, [topq + xq] ; 0000xxxx
+ movh m1, [bottomq + xq]
+ punpcklbw m0, m2 ; 00xx00xx
+ punpcklbw m1, m2
+
+ pmullw m0, m1 ; xxxxxxxx a * b
+ paddw m0, m3
+ mova m1, m0
+ psrlw m1, 8
+ paddw m0, m1
+ psrlw m0, 8 ; 00xx00xx a * b / 255
+
+ packuswb m0, m0 ; 0000xxxx
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
+
+ jl .loop
+BLEND_END
+
BLEND_INIT average, 3
pxor m2, m2
.nextrow:
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index dc29547b3b..8ac526aacd 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -36,6 +36,7 @@ BLEND_FUNC(average, sse2)
BLEND_FUNC(and, sse2)
BLEND_FUNC(darken, sse2)
BLEND_FUNC(difference128, sse2)
+BLEND_FUNC(multiply, sse2)
BLEND_FUNC(hardmix, sse2)
BLEND_FUNC(lighten, sse2)
BLEND_FUNC(or, sse2)
@@ -61,6 +62,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
+ case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
case BLEND_OR: param->blend = ff_blend_or_sse2; break;
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;