summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2016-01-11 15:53:41 +0100
committerDiego Biurrun <diego@biurrun.de>2016-07-20 18:33:25 +0200
commit1dfc3cf89d0eb026af28be46294b85d79499ffb5 (patch)
treee44e700d395f7b4c8d327ef47191c1efb541b6f7
parent9833a406d3d743d238e4cbee08ffcaa12e067dd2 (diff)
x86: hpeldsp: Split off VP3-specific bits into a separate file
-rw-r--r--libavcodec/x86/Makefile2
-rw-r--r--libavcodec/x86/hpeldsp.asm89
-rw-r--r--libavcodec/x86/hpeldsp.h4
-rw-r--r--libavcodec/x86/hpeldsp_init.c25
-rw-r--r--libavcodec/x86/hpeldsp_vp3.asm111
-rw-r--r--libavcodec/x86/hpeldsp_vp3_init.c56
6 files changed, 176 insertions, 111 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 1460197248..32086992b4 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -51,6 +51,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
+OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
@@ -122,5 +123,6 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
+YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp.o
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index b8929b96d4..8e211140ca 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -142,53 +142,6 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_X2
-; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
-cglobal put_no_rnd_pixels8_x2_exact, 4,5
- lea r4, [r2*3]
- pcmpeqb m6, m6
-.loop:
- mova m0, [r1]
- mova m2, [r1+r2]
- mova m1, [r1+1]
- mova m3, [r1+r2+1]
- pxor m0, m6
- pxor m2, m6
- pxor m1, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0], m0
- mova [r0+r2], m2
- mova m0, [r1+r2*2]
- mova m1, [r1+r2*2+1]
- mova m2, [r1+r4]
- mova m3, [r1+r4+1]
- pxor m0, m6
- pxor m1, m6
- pxor m2, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0+r2*2], m0
- mova [r0+r4], m2
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2_EXACT
-
-
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0
cglobal put_pixels8_y2, 4,5
@@ -260,48 +213,6 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_Y2
-; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
-cglobal put_no_rnd_pixels8_y2_exact, 4,5
- lea r4, [r2*3]
- mova m0, [r1]
- pcmpeqb m6, m6
- add r1, r2
- pxor m0, m6
-.loop:
- mova m1, [r1]
- mova m2, [r1+r2]
- pxor m1, m6
- pxor m2, m6
- PAVGB m0, m1
- PAVGB m1, m2
- pxor m0, m6
- pxor m1, m6
- mova [r0], m0
- mova [r0+r2], m1
- mova m1, [r1+r2*2]
- mova m0, [r1+r4]
- pxor m1, m6
- pxor m0, m6
- PAVGB m2, m1
- PAVGB m1, m0
- pxor m2, m6
- pxor m1, m6
- mova [r0+r2*2], m2
- mova [r0+r4], m1
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2_EXACT
-
-
; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8 0
cglobal avg_pixels8, 4,5
diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
index 47b0b8b825..d624ed9d78 100644
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -22,6 +22,8 @@
#include <stddef.h>
#include <stdint.h>
+#include "libavcodec/hpeldsp.h"
+
void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
@@ -35,4 +37,6 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
#endif /* AVCODEC_X86_HPELDSP_H */
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 65aac7164d..673142807d 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -43,12 +43,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -57,12 +51,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -209,11 +197,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
}
-
- if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
- }
#endif /* HAVE_MMXEXT_EXTERNAL */
}
@@ -243,11 +226,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
}
-
- if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
- }
#endif /* HAVE_AMD3DNOW_EXTERNAL */
}
@@ -278,4 +256,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
if (EXTERNAL_SSE2(cpu_flags))
hpeldsp_init_sse2(c, flags, cpu_flags);
+
+ if (CONFIG_VP3_DECODER)
+ ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
}
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
new file mode 100644
index 0000000000..513f14e4b4
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -0,0 +1,111 @@
+;******************************************************************************
+;* SIMD-optimized halfpel functions for VP3
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+cglobal put_no_rnd_pixels8_x2_exact, 4,5
+ lea r4, [r2*3]
+ pcmpeqb m6, m6
+.loop:
+ mova m0, [r1]
+ mova m2, [r1+r2]
+ mova m1, [r1+1]
+ mova m3, [r1+r2+1]
+ pxor m0, m6
+ pxor m2, m6
+ pxor m1, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0], m0
+ mova [r0+r2], m2
+ mova m0, [r1+r2*2]
+ mova m1, [r1+r2*2+1]
+ mova m2, [r1+r4]
+ mova m3, [r1+r4+1]
+ pxor m0, m6
+ pxor m1, m6
+ pxor m2, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0+r2*2], m0
+ mova [r0+r4], m2
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_X2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+cglobal put_no_rnd_pixels8_y2_exact, 4,5
+ lea r4, [r2*3]
+ mova m0, [r1]
+ pcmpeqb m6, m6
+ add r1, r2
+ pxor m0, m6
+.loop:
+ mova m1, [r1]
+ mova m2, [r1+r2]
+ pxor m1, m6
+ pxor m2, m6
+ PAVGB m0, m1
+ PAVGB m1, m2
+ pxor m0, m6
+ pxor m1, m6
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova m1, [r1+r2*2]
+ mova m0, [r1+r4]
+ pxor m1, m6
+ pxor m0, m6
+ PAVGB m2, m1
+ PAVGB m1, m0
+ pxor m2, m6
+ pxor m1, m6
+ mova [r0+r2*2], m2
+ mova [r0+r4], m1
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PUT_NO_RND_PIXELS8_Y2_EXACT
+INIT_MMX 3dnow
+PUT_NO_RND_PIXELS8_Y2_EXACT
diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
new file mode 100644
index 0000000000..2510c11d81
--- /dev/null
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/hpeldsp.h"
+
+#include "hpeldsp.h"
+
+void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+{
+ if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+ }
+ }
+
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+ }
+ }
+}