summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfigure12
-rw-r--r--libavutil/float2half.c2
-rw-r--r--libavutil/float2half.h16
-rw-r--r--libavutil/half2float.c4
-rw-r--r--libavutil/half2float.h16
5 files changed, 50 insertions, 0 deletions
diff --git a/configure b/configure
index fe94941a03..ea50c94002 100755
--- a/configure
+++ b/configure
@@ -2145,6 +2145,7 @@ ARCH_FEATURES="
fast_64bit
fast_clz
fast_cmov
+ fast_float16
local_aligned
simd_align_16
simd_align_32
@@ -5127,6 +5128,8 @@ elif enabled arm; then
;;
esac
+ test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee
+
elif enabled avr32; then
case $cpu in
@@ -6231,6 +6234,15 @@ check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync
check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)"
+check_builtin float16 "" "_Float16 f16var"
+if enabled float16; then
+ if enabled x86; then
+ test_cpp_condition stddef.h "defined(__F16C__)" && enable fast_float16
+ elif enabled arm || enabled aarch64; then
+ enable fast_float16
+ fi
+fi
+
case "$custom_allocator" in
jemalloc)
# jemalloc by default does not use a prefix
diff --git a/libavutil/float2half.c b/libavutil/float2half.c
index c79a3abfa1..1a283956e7 100644
--- a/libavutil/float2half.c
+++ b/libavutil/float2half.c
@@ -20,6 +20,7 @@
void ff_init_float2half_tables(Float2HalfTables *t)
{
+#if !HAVE_FAST_FLOAT16
for (int i = 0; i < 256; i++) {
int e = i - 127;
@@ -50,4 +51,5 @@ void ff_init_float2half_tables(Float2HalfTables *t)
t->shifttable[i|0x100] = 13;
}
}
+#endif
}
diff --git a/libavutil/float2half.h b/libavutil/float2half.h
index 20fdc2a36b..e619046911 100644
--- a/libavutil/float2half.h
+++ b/libavutil/float2half.h
@@ -20,21 +20,37 @@
#define AVUTIL_FLOAT2HALF_H
#include <stdint.h>
+#include "intfloat.h"
+
+#include "config.h"
typedef struct Float2HalfTables {
+#if HAVE_FAST_FLOAT16
+ uint8_t dummy;
+#else
uint16_t basetable[512];
uint8_t shifttable[512];
+#endif
} Float2HalfTables;
void ff_init_float2half_tables(Float2HalfTables *t);
static inline uint16_t float2half(uint32_t f, const Float2HalfTables *t)
{
+#if HAVE_FAST_FLOAT16
+ union {
+ _Float16 f;
+ uint16_t i;
+ } u;
+ u.f = av_int2float(f);
+ return u.i;
+#else
uint16_t h;
h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]);
return h;
+#endif
}
#endif /* AVUTIL_FLOAT2HALF_H */
diff --git a/libavutil/half2float.c b/libavutil/half2float.c
index 1967126f76..4de2180a19 100644
--- a/libavutil/half2float.c
+++ b/libavutil/half2float.c
@@ -18,6 +18,7 @@
#include "libavutil/half2float.h"
+#if !HAVE_FAST_FLOAT16
static uint32_t convertmantissa(uint32_t i)
{
int32_t m = i << 13; // Zero pad mantissa bits
@@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i)
return m | e; // Return combined number
}
+#endif
void ff_init_half2float_tables(Half2FloatTables *t)
{
+#if !HAVE_FAST_FLOAT16
t->mantissatable[0] = 0;
for (int i = 1; i < 1024; i++)
t->mantissatable[i] = convertmantissa(i);
@@ -60,4 +63,5 @@ void ff_init_half2float_tables(Half2FloatTables *t)
t->offsettable[31] = 2048;
t->offsettable[32] = 0;
t->offsettable[63] = 2048;
+#endif
}
diff --git a/libavutil/half2float.h b/libavutil/half2float.h
index 428a27a19f..dbd5e7150f 100644
--- a/libavutil/half2float.h
+++ b/libavutil/half2float.h
@@ -20,22 +20,38 @@
#define AVUTIL_HALF2FLOAT_H
#include <stdint.h>
+#include "intfloat.h"
+
+#include "config.h"
typedef struct Half2FloatTables {
+#if HAVE_FAST_FLOAT16
+ uint8_t dummy;
+#else
uint32_t mantissatable[3072];
uint32_t exponenttable[64];
uint16_t offsettable[64];
+#endif
} Half2FloatTables;
void ff_init_half2float_tables(Half2FloatTables *t);
static inline uint32_t half2float(uint16_t h, const Half2FloatTables *t)
{
+#if HAVE_FAST_FLOAT16
+ union {
+ _Float16 f;
+ uint16_t i;
+ } u;
+ u.i = h;
+ return av_float2int(u.f);
+#else
uint32_t f;
f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10];
return f;
+#endif
}
#endif /* AVUTIL_HALF2FLOAT_H */