diff options
author | James Almer <jamrial@gmail.com> | 2017-01-08 11:48:05 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-01-12 22:53:05 -0300 |
commit | 47f212329e5d73c81e2c67acd6a481bc0fe687b2 (patch) | |
tree | 5457e6f0cad40c63b67f86331659f880b4e576b8 /libavcodec/x86 | |
parent | cf9ef839606dd50f779c395d8a277de143f7e5b2 (diff) |
huffyuvdsp: move functions only used by huffyuv from lossless_videodsp
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/huffyuvdsp.asm | 137 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp_init.c | 13 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp.asm | 136 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp_init.c | 14 |
4 files changed, 150 insertions, 150 deletions
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index 0befd3baa8..0d8cae354a 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -24,6 +24,78 @@ SECTION .text + +%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub + movd m4, maskd + SPLATW m4, m4 + add wd, wd + test wq, 2*mmsize - 1 + jz %%.tomainloop + push tmpq +%%.wordloop: + sub wq, 2 +%ifidn %2, add + mov tmpw, [srcq+wq] + add tmpw, [dstq+wq] +%else + mov tmpw, [src1q+wq] + sub tmpw, [src2q+wq] +%endif + and tmpw, maskw + mov [dstq+wq], tmpw + test wq, 2*mmsize - 1 + jnz %%.wordloop + pop tmpq +%%.tomainloop: +%ifidn %2, add + add srcq, wq +%else + add src1q, wq + add src2q, wq +%endif + add dstq, wq + neg wq + jz %%.end +%%.loop: +%ifidn %2, add + mov%1 m0, [srcq+wq] + mov%1 m1, [dstq+wq] + mov%1 m2, [srcq+wq+mmsize] + mov%1 m3, [dstq+wq+mmsize] +%else + mov%1 m0, [src1q+wq] + mov%1 m1, [src2q+wq] + mov%1 m2, [src1q+wq+mmsize] + mov%1 m3, [src2q+wq+mmsize] +%endif + p%2w m0, m1 + p%2w m2, m3 + pand m0, m4 + pand m2, m4 + mov%1 [dstq+wq] , m0 + mov%1 [dstq+wq+mmsize], m2 + add wq, 2*mmsize + jl %%.loop +%%.end: + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +cglobal add_int16, 4,4,5, dst, src, mask, w, tmp + INT16_LOOP a, add +%endif + +INIT_XMM sse2 +cglobal add_int16, 4,4,5, dst, src, mask, w, tmp + test srcq, mmsize-1 + jnz .unaligned + test dstq, mmsize-1 + jnz .unaligned + INT16_LOOP a, add +.unaligned: + INT16_LOOP u, add + ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, ; intptr_t w, uint8_t *left) %macro LEFT_BGR32 0 @@ -63,3 +135,68 @@ LEFT_BGR32 %endif INIT_XMM sse2 LEFT_BGR32 + +; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) +INIT_MMX mmxext +cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top + add wd, wd + movd mm6, maskd + SPLATW mm6, mm6 + movq mm0, [topq] + movq mm2, mm0 + movd mm4, [left_topq] + psllq mm2, 16 + movq mm1, mm0 + por mm4, mm2 + movd mm3, [leftq] + psubw mm0, mm4 ; t-tl + add dstq, wq + add topq, wq + add diffq, wq + neg wq + jmp .skip +.loop: + movq mm4, [topq+wq] + movq mm0, mm4 + psllq mm4, 16 + por mm4, mm1 + movq mm1, mm0 ; t + psubw mm0, mm4 ; t-tl +.skip: + movq mm2, [diffq+wq] +%assign i 0 +%rep 4 + movq mm4, mm0 + paddw mm4, mm3 ; t-tl+l + pand mm4, mm6 + movq mm5, mm3 + pmaxsw mm3, mm1 + pminsw mm5, mm1 + pminsw mm3, mm4 + pmaxsw mm3, mm5 ; median + paddw mm3, mm2 ; +residual + pand mm3, mm6 +%if i==0 + movq mm7, mm3 + psllq mm7, 48 +%else + movq mm4, mm3 + psrlq mm7, 16 + psllq mm4, 48 + por mm7, mm4 +%endif +%if i<3 + psrlq mm0, 16 + psrlq mm1, 16 + psrlq mm2, 16 +%endif +%assign i i+1 +%endrep + movq [dstq+wq], mm7 + add wq, 8 + jl .loop + movzx r2d, word [dstq-2] + mov [leftq], r2d + movzx r2d, word [topq-2] + mov [left_topq], r2d + RET diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index fc87c3844b..f72d759ef2 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -21,24 +21,35 @@ #include "config.h" #include "libavutil/attributes.h" #include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" #include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavcodec/huffyuvdsp.h" +void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); +void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); +void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); -av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) +av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, AVCodecContext *avctx) { int cpu_flags = av_get_cpu_flags(); + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx; + c->add_int16 = ff_add_int16_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { + c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { + c->add_int16 = ff_add_int16_sse2; c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; } } diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index bcc40ec061..f82f04f7fa 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -217,77 +217,6 @@ ADD_BYTES INIT_XMM sse2 ADD_BYTES -%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub - movd m4, maskd - SPLATW m4, m4 - add wd, wd - test wq, 2*mmsize - 1 - jz %%.tomainloop - push tmpq -%%.wordloop: - sub wq, 2 -%ifidn %2, add - mov tmpw, [srcq+wq] - add tmpw, [dstq+wq] -%else - mov tmpw, [src1q+wq] - sub tmpw, [src2q+wq] -%endif - and tmpw, maskw - mov [dstq+wq], tmpw - test wq, 2*mmsize - 1 - jnz %%.wordloop - pop tmpq -%%.tomainloop: -%ifidn %2, add - add srcq, wq -%else - add src1q, wq - add src2q, wq -%endif - add dstq, wq - neg wq - jz %%.end -%%.loop: -%ifidn %2, add - mov%1 m0, [srcq+wq] - mov%1 m1, [dstq+wq] - mov%1 m2, [srcq+wq+mmsize] - mov%1 m3, [dstq+wq+mmsize] -%else - mov%1 m0, [src1q+wq] - mov%1 m1, [src2q+wq] - mov%1 m2, [src1q+wq+mmsize] - mov%1 m3, [src2q+wq+mmsize] -%endif - p%2w m0, m1 - p%2w m2, m3 - pand m0, m4 - pand m2, m4 - mov%1 [dstq+wq] , m0 - mov%1 [dstq+wq+mmsize], m2 - add wq, 2*mmsize - jl %%.loop -%%.end: - RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -cglobal add_int16, 4,4,5, dst, src, mask, w, tmp - INT16_LOOP a, add -%endif - -INIT_XMM sse2 -cglobal add_int16, 4,4,5, dst, src, mask, w, tmp - test srcq, mmsize-1 - jnz .unaligned - test dstq, mmsize-1 - jnz .unaligned - INT16_LOOP a, add -.unaligned: - INT16_LOOP u, add - %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u) add wd, wd add srcq, wq @@ -359,68 +288,3 @@ cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left ADD_HFYU_LEFT_LOOP_INT16 u, a .src_unaligned: ADD_HFYU_LEFT_LOOP_INT16 u, u - -; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) -INIT_MMX mmxext -cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top - add wd, wd - movd mm6, maskd - SPLATW mm6, mm6 - movq mm0, [topq] - movq mm2, mm0 - movd mm4, [left_topq] - psllq mm2, 16 - movq mm1, mm0 - por mm4, mm2 - movd mm3, [leftq] - psubw mm0, mm4 ; t-tl - add dstq, wq - add topq, wq - add diffq, wq - neg wq - jmp .skip -.loop: - movq mm4, [topq+wq] - movq mm0, mm4 - psllq mm4, 16 - por mm4, mm1 - movq mm1, mm0 ; t - psubw mm0, mm4 ; t-tl -.skip: - movq mm2, [diffq+wq] -%assign i 0 -%rep 4 - movq mm4, mm0 - paddw mm4, mm3 ; t-tl+l - pand mm4, mm6 - movq mm5, mm3 - pmaxsw mm3, mm1 - pminsw mm5, mm1 - pminsw mm3, mm4 - pmaxsw mm3, mm5 ; median - paddw mm3, mm2 ; +residual - pand mm3, mm6 -%if i==0 - movq mm7, mm3 - psllq mm7, 48 -%else - movq mm4, mm3 - psrlq mm7, 16 - psllq mm4, 48 - por mm7, mm4 -%endif -%if i<3 - psrlq mm0, 16 - psrlq mm1, 16 - psrlq mm2, 16 -%endif -%assign i i+1 -%endrep - movq [dstq+wq], mm7 - add wq, 8 - jl .loop - movzx r2d, word [dstq-2] - mov [leftq], r2d - movzx r2d, word [topq-2] - mov [left_topq], r2d - RET diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 2dc662d8b1..dbb63a1f48 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -21,7 +21,6 @@ #include "config.h" #include "libavutil/x86/asm.h" #include "../lossless_videodsp.h" -#include "libavutil/pixdesc.h" #include "libavutil/x86/cpu.h" void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, intptr_t w); @@ -39,11 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src, int ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src, intptr_t w, int left); -void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); -void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); -void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, @@ -83,10 +79,9 @@ static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, } #endif -void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) +void ff_llviddsp_init_x86(LLVidDSPContext *c) { int cpu_flags = av_get_cpu_flags(); - const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 if (cpu_flags & AV_CPU_FLAG_CMOV) @@ -95,7 +90,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { c->add_bytes = ff_add_bytes_mmx; - c->add_int16 = ff_add_int16_mmx; } if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) { @@ -104,15 +98,9 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) c->add_median_pred = ff_add_median_pred_mmxext; } - if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { - c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; - } - if (EXTERNAL_SSE2(cpu_flags)) { c->add_bytes = ff_add_bytes_sse2; c->add_median_pred = ff_add_median_pred_sse2; - - c->add_int16 = ff_add_int16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { |