summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-10-02 01:29:32 +0200
committerJames Almer <jamrial@gmail.com>2017-10-03 19:47:37 -0300
commitcbbec68847ed3485900e83ec231871f71bb97d0d (patch)
treee720235468b4ae41c4e7dd1c2011d28a40a99b99 /libavcodec/x86
parent4590d073ccdc7c3ce0384d5b1fba56b3f6673535 (diff)
libavcodec/blockdsp : add AVX version
Also modify the required alignment, to 32 instead of 16 for several codecs Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/blockdsp.asm14
-rw-r--r--libavcodec/x86/blockdsp_init.c6
2 files changed, 14 insertions, 6 deletions
diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm
index 7cbfa3a843..2498bd40b3 100644
--- a/libavcodec/x86/blockdsp.asm
+++ b/libavcodec/x86/blockdsp.asm
@@ -4,6 +4,8 @@
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2009 Fiona Glaser
;*
+;* AVX version by Jokyo Images
+;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
@@ -39,20 +41,18 @@ cglobal clear_block, 1, 1, %1, blocks
mova [blocksq+mmsize*(1+%%i)], m0
mova [blocksq+mmsize*(2+%%i)], m0
mova [blocksq+mmsize*(3+%%i)], m0
- mova [blocksq+mmsize*(4+%%i)], m0
- mova [blocksq+mmsize*(5+%%i)], m0
- mova [blocksq+mmsize*(6+%%i)], m0
- mova [blocksq+mmsize*(7+%%i)], m0
-%assign %%i %%i+8
+%assign %%i %%i+4
%endrep
RET
%endmacro
INIT_MMX mmx
%define ZERO pxor
-CLEAR_BLOCK 0, 2
+CLEAR_BLOCK 0, 4
INIT_XMM sse
%define ZERO xorps
+CLEAR_BLOCK 1, 2
+INIT_YMM avx
CLEAR_BLOCK 1, 1
;-----------------------------------------
@@ -84,3 +84,5 @@ CLEAR_BLOCKS 0
INIT_XMM sse
%define ZERO xorps
CLEAR_BLOCKS 1
+INIT_YMM avx
+CLEAR_BLOCKS 1
diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c
index afd25e1cbb..8b01a447cd 100644
--- a/libavcodec/x86/blockdsp_init.c
+++ b/libavcodec/x86/blockdsp_init.c
@@ -28,8 +28,10 @@
void ff_clear_block_mmx(int16_t *block);
void ff_clear_block_sse(int16_t *block);
+void ff_clear_block_avx(int16_t *block);
void ff_clear_blocks_mmx(int16_t *blocks);
void ff_clear_blocks_sse(int16_t *blocks);
+void ff_clear_blocks_avx(int16_t *blocks);
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
AVCodecContext *avctx)
@@ -50,5 +52,9 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
c->clear_block = ff_clear_block_sse;
c->clear_blocks = ff_clear_blocks_sse;
}
+ if (EXTERNAL_AVX_FAST(cpu_flags)) {
+ c->clear_block = ff_clear_block_avx;
+ c->clear_blocks = ff_clear_blocks_avx;
+ }
#endif /* HAVE_X86ASM */
}