summaryrefslogtreecommitdiff
path: root/libavcodec/x86/me_cmp.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/me_cmp.asm')
-rw-r--r--libavcodec/x86/me_cmp.asm177
1 files changed, 154 insertions, 23 deletions
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 1a87f37b39..b0741f398b 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -4,20 +4,20 @@
;* Copyright (c) 2000, 2001 Fabrice Bellard
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;*****************************************************************************
@@ -274,19 +274,27 @@ INIT_XMM ssse3
%define ABS_SUM_8x8 ABS_SUM_8x8_64
HADAMARD8_DIFF 9
-INIT_XMM sse2
-; int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-; int line_size, int h);
-cglobal sse16, 5, 5, 8
- shr r4d, 1
+; int ff_sse*_*(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+; int line_size, int h)
+
+%macro SUM_SQUARED_ERRORS 1
+cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
+%if %1 == mmsize
+ shr hd, 1
+%endif
pxor m0, m0 ; mm0 = 0
pxor m7, m7 ; mm7 holds the sum
.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
- movu m1, [r1 ] ; mm1 = pix1[0][0-15]
- movu m2, [r2 ] ; mm2 = pix2[0][0-15]
- movu m3, [r1+r3] ; mm3 = pix1[1][0-15]
- movu m4, [r2+r3] ; mm4 = pix2[1][0-15]
+ movu m1, [pix1q] ; m1 = pix1[0][0-15], [0-7] for mmx
+ movu m2, [pix2q] ; m2 = pix2[0][0-15], [0-7] for mmx
+%if %1 == mmsize
+ movu m3, [pix1q+lsizeq] ; m3 = pix1[1][0-15], [0-7] for mmx
+ movu m4, [pix2q+lsizeq] ; m4 = pix2[1][0-15], [0-7] for mmx
+%else ; %1 / 2 == mmsize; mmx only
+ mova m3, [pix1q+8] ; m3 = pix1[0][8-15]
+ mova m4, [pix2q+8] ; m4 = pix2[0][8-15]
+%endif
; todo: mm1-mm2, mm3-mm4
; algo: subtract mm1 from mm2 with saturation and vice versa
@@ -315,22 +323,145 @@ cglobal sse16, 5, 5, 8
pmaddwd m1, m1
pmaddwd m3, m3
- lea r1, [r1+r3*2] ; pix1 += 2*line_size
- lea r2, [r2+r3*2] ; pix2 += 2*line_size
-
paddd m1, m2
paddd m3, m4
paddd m7, m1
paddd m7, m3
- dec r4
+%if %1 == mmsize
+ lea pix1q, [pix1q + 2*lsizeq]
+ lea pix2q, [pix2q + 2*lsizeq]
+%else
+ add pix1q, lsizeq
+ add pix2q, lsizeq
+%endif
+ dec hd
jnz .next2lines
- mova m1, m7
- psrldq m7, 8 ; shift hi qword to lo
- paddd m7, m1
- mova m1, m7
- psrldq m7, 4 ; shift hi dword to lo
- paddd m7, m1
+ HADDD m7, m1
movd eax, m7 ; return value
RET
+%endmacro
+
+INIT_MMX mmx
+SUM_SQUARED_ERRORS 8
+
+INIT_MMX mmx
+SUM_SQUARED_ERRORS 16
+
+INIT_XMM sse2
+SUM_SQUARED_ERRORS 16
+
+;-----------------------------------------------
+;int ff_sum_abs_dctelem(int16_t *block)
+;-----------------------------------------------
+; %1 = number of xmm registers used
+; %2 = number of inline loops
+
+%macro SUM_ABS_DCTELEM 2
+cglobal sum_abs_dctelem, 1, 1, %1, block
+ pxor m0, m0
+ pxor m1, m1
+%assign %%i 0
+%rep %2
+ mova m2, [blockq+mmsize*(0+%%i)]
+ mova m3, [blockq+mmsize*(1+%%i)]
+ mova m4, [blockq+mmsize*(2+%%i)]
+ mova m5, [blockq+mmsize*(3+%%i)]
+ ABS1_SUM m2, m6, m0
+ ABS1_SUM m3, m6, m1
+ ABS1_SUM m4, m6, m0
+ ABS1_SUM m5, m6, m1
+%assign %%i %%i+4
+%endrep
+ paddusw m0, m1
+ HSUM m0, m1, eax
+ and eax, 0xFFFF
+ RET
+%endmacro
+
+INIT_MMX mmx
+SUM_ABS_DCTELEM 0, 4
+INIT_MMX mmxext
+SUM_ABS_DCTELEM 0, 4
+INIT_XMM sse2
+SUM_ABS_DCTELEM 7, 2
+INIT_XMM ssse3
+SUM_ABS_DCTELEM 6, 2
+
+;------------------------------------------------------------------------------
+; int ff_hf_noise*_mmx(uint8_t *pix1, int lsize, int h)
+;------------------------------------------------------------------------------
+; %1 = 8/16. %2-5=m#
+%macro HF_NOISE_PART1 5
+ mova m%2, [pix1q]
+%if %1 == 8
+ mova m%3, m%2
+ psllq m%2, 8
+ psrlq m%3, 8
+ psrlq m%2, 8
+%else
+ mova m%3, [pix1q+1]
+%endif
+ mova m%4, m%2
+ mova m%5, m%3
+ punpcklbw m%2, m7
+ punpcklbw m%3, m7
+ punpckhbw m%4, m7
+ punpckhbw m%5, m7
+ psubw m%2, m%3
+ psubw m%4, m%5
+%endmacro
+
+; %1-2 = m#
+%macro HF_NOISE_PART2 4
+ psubw m%1, m%3
+ psubw m%2, m%4
+ pxor m3, m3
+ pxor m1, m1
+ pcmpgtw m3, m%1
+ pcmpgtw m1, m%2
+ pxor m%1, m3
+ pxor m%2, m1
+ psubw m%1, m3
+ psubw m%2, m1
+ paddw m%2, m%1
+ paddw m6, m%2
+%endmacro
+
+; %1 = 8/16
+%macro HF_NOISE 1
+cglobal hf_noise%1, 3,3,0, pix1, lsize, h
+ movsxdifnidn lsizeq, lsized
+ sub hd, 2
+ pxor m7, m7
+ pxor m6, m6
+ HF_NOISE_PART1 %1, 0, 1, 2, 3
+ add pix1q, lsizeq
+ HF_NOISE_PART1 %1, 4, 1, 5, 3
+ HF_NOISE_PART2 0, 2, 4, 5
+ add pix1q, lsizeq
+.loop:
+ HF_NOISE_PART1 %1, 0, 1, 2, 3
+ HF_NOISE_PART2 4, 5, 0, 2
+ add pix1q, lsizeq
+ HF_NOISE_PART1 %1, 4, 1, 5, 3
+ HF_NOISE_PART2 0, 2, 4, 5
+ add pix1q, lsizeq
+ sub hd, 2
+ jne .loop
+
+ mova m0, m6
+ punpcklwd m0, m7
+ punpckhwd m6, m7
+ paddd m6, m0
+ mova m0, m6
+ psrlq m6, 32
+ paddd m0, m6
+ movd eax, m0 ; eax = result of hf_noise8;
+ REP_RET ; return eax;
+%endmacro
+
+INIT_MMX mmx
+HF_NOISE 8
+HF_NOISE 16