summaryrefslogtreecommitdiff
path: root/libavcodec/arm
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/arm')
-rw-r--r--libavcodec/arm/Makefile3
-rw-r--r--libavcodec/arm/dsputil_init_arm.c1
-rw-r--r--libavcodec/arm/dsputil_init_neon.c3
-rw-r--r--libavcodec/arm/dsputil_init_vfp.c30
-rw-r--r--libavcodec/arm/dsputil_neon.S24
-rw-r--r--libavcodec/arm/dsputil_vfp.S106
6 files changed, 0 insertions, 167 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 71048f9c4c..5ebda62cf6 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -56,9 +56,6 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
-VFP-OBJS += arm/dsputil_vfp.o \
- arm/dsputil_init_vfp.o \
-
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o \
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 0c1563df93..9feda140ff 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -121,6 +121,5 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx);
if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx);
- if (have_vfp(cpu_flags)) ff_dsputil_init_vfp(c, avctx);
if (have_neon(cpu_flags)) ff_dsputil_init_neon(c, avctx);
}
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 0d23b26826..bb0c4af69a 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
-void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
- const float *src1, int len);
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
@@ -298,7 +296,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->butterflies_float = ff_butterflies_float_neon;
c->scalarproduct_float = ff_scalarproduct_float_neon;
- c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
diff --git a/libavcodec/arm/dsputil_init_vfp.c b/libavcodec/arm/dsputil_init_vfp.c
deleted file mode 100644
index d77d686578..0000000000
--- a/libavcodec/arm/dsputil_init_vfp.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/dsputil.h"
-#include "dsputil_arm.h"
-
-void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
- const float *src1, int len);
-
-void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx)
-{
- c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
-}
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 5e512a7a21..4ceecbcb4e 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -556,30 +556,6 @@ NOVFP vmov.32 r0, d0[0]
bx lr
endfunc
-function ff_vector_fmul_reverse_neon, export=1
- add r2, r2, r3, lsl #2
- sub r2, r2, #32
- mov r12, #-32
- vld1.32 {q0-q1}, [r1,:128]!
- vld1.32 {q2-q3}, [r2,:128], r12
-1: pld [r1, #32]
- vrev64.32 q3, q3
- vmul.f32 d16, d0, d7
- vmul.f32 d17, d1, d6
- pld [r2, #-32]
- vrev64.32 q2, q2
- vmul.f32 d18, d2, d5
- vmul.f32 d19, d3, d4
- subs r3, r3, #8
- beq 2f
- vld1.32 {q0-q1}, [r1,:128]!
- vld1.32 {q2-q3}, [r2,:128], r12
- vst1.32 {q8-q9}, [r0,:128]!
- b 1b
-2: vst1.32 {q8-q9}, [r0,:128]!
- bx lr
-endfunc
-
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S
deleted file mode 100644
index 9df955dbf9..0000000000
--- a/libavcodec/arm/dsputil_vfp.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/arm/asm.S"
-
-/*
- * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
- * throughput for almost all the instructions (except for double precision
- * arithmetics), but rather high latency. Latency is 4 cycles for loads and 8 cycles
- * for arithmetic operations. Scheduling code to avoid pipeline stalls is very
- * important for performance. One more interesting feature is that VFP has
- * independent load/store and arithmetics pipelines, so it is possible to make
- * them work simultaneously and get more than 1 operation per cycle. Load/store
- * pipeline can process 2 single precision floating point values per cycle and
- * supports bulk loads and stores for large sets of registers. Arithmetic operations
- * can be done on vectors, which allows to keep the arithmetics pipeline busy,
- * while the processor may issue and execute other instructions. Detailed
- * optimization manuals can be found at http://www.arm.com
- */
-
-/**
- * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
- * Assume that len is a positive number and is multiple of 8
- */
-@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
-@ const float *src1, int len)
-function ff_vector_fmul_reverse_vfp, export=1
- vpush {d8-d15}
- add r2, r2, r3, lsl #2
- vldmdb r2!, {s0-s3}
- vldmia r1!, {s8-s11}
- vldmdb r2!, {s4-s7}
- vldmia r1!, {s12-s15}
- vmul.f32 s8, s3, s8
- vmul.f32 s9, s2, s9
- vmul.f32 s10, s1, s10
- vmul.f32 s11, s0, s11
-1:
- subs r3, r3, #16
- it ge
- vldmdbge r2!, {s16-s19}
- vmul.f32 s12, s7, s12
- it ge
- vldmiage r1!, {s24-s27}
- vmul.f32 s13, s6, s13
- it ge
- vldmdbge r2!, {s20-s23}
- vmul.f32 s14, s5, s14
- it ge
- vldmiage r1!, {s28-s31}
- vmul.f32 s15, s4, s15
- it ge
- vmulge.f32 s24, s19, s24
- it gt
- vldmdbgt r2!, {s0-s3}
- it ge
- vmulge.f32 s25, s18, s25
- vstmia r0!, {s8-s13}
- it ge
- vmulge.f32 s26, s17, s26
- it gt
- vldmiagt r1!, {s8-s11}
- itt ge
- vmulge.f32 s27, s16, s27
- vmulge.f32 s28, s23, s28
- it gt
- vldmdbgt r2!, {s4-s7}
- it ge
- vmulge.f32 s29, s22, s29
- vstmia r0!, {s14-s15}
- ittt ge
- vmulge.f32 s30, s21, s30
- vmulge.f32 s31, s20, s31
- vmulge.f32 s8, s3, s8
- it gt
- vldmiagt r1!, {s12-s15}
- itttt ge
- vmulge.f32 s9, s2, s9
- vmulge.f32 s10, s1, s10
- vstmiage r0!, {s24-s27}
- vmulge.f32 s11, s0, s11
- it ge
- vstmiage r0!, {s28-s31}
- bgt 1b
-
- vpop {d8-d15}
- bx lr
-endfunc