summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-09-27 23:10:09 -0300
committerJames Almer <jamrial@gmail.com>2017-09-27 23:10:09 -0300
commit3b345d389be2d67017f904caa21713f53a8e8c90 (patch)
tree44d50b299738c39f3631855bf5a67464ec0d4540
parent522f87708653af3badcdc33be983bcc6009de49b (diff)
avutil/cpu: split flag checks per arch in av_cpu_max_align()
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavutil/aarch64/cpu.c10
-rw-r--r--libavutil/arm/cpu.c10
-rw-r--r--libavutil/cpu.c39
-rw-r--r--libavutil/cpu_internal.h5
-rw-r--r--libavutil/ppc/cpu.c12
-rw-r--r--libavutil/x86/cpu.c27
6 files changed, 72 insertions, 31 deletions
diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index 8ef077aaea..cc641da576 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -26,3 +26,13 @@ int ff_get_cpu_flags_aarch64(void)
AV_CPU_FLAG_NEON * HAVE_NEON |
AV_CPU_FLAG_VFP * HAVE_VFP;
}
+
+size_t ff_get_cpu_max_align_aarch64(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_NEON)
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c
index 3889ef011c..81e85e2525 100644
--- a/libavutil/arm/cpu.c
+++ b/libavutil/arm/cpu.c
@@ -158,3 +158,13 @@ int ff_get_cpu_flags_arm(void)
}
#endif
+
+size_t ff_get_cpu_max_align_arm(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_NEON)
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index ab04494acf..c8401b8258 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -304,37 +304,14 @@ int av_cpu_count(void)
size_t av_cpu_max_align(void)
{
- int av_unused flags = av_get_cpu_flags();
-
-#if ARCH_ARM || ARCH_AARCH64
- if (flags & AV_CPU_FLAG_NEON)
- return 16;
-#elif ARCH_PPC
- if (flags & (AV_CPU_FLAG_ALTIVEC |
- AV_CPU_FLAG_VSX |
- AV_CPU_FLAG_POWER8))
- return 16;
-#elif ARCH_X86
- if (flags & (AV_CPU_FLAG_AVX2 |
- AV_CPU_FLAG_AVX |
- AV_CPU_FLAG_XOP |
- AV_CPU_FLAG_FMA4 |
- AV_CPU_FLAG_FMA3 |
- AV_CPU_FLAG_AVXSLOW))
- return 32;
- if (flags & (AV_CPU_FLAG_AESNI |
- AV_CPU_FLAG_SSE42 |
- AV_CPU_FLAG_SSE4 |
- AV_CPU_FLAG_SSSE3 |
- AV_CPU_FLAG_SSE3 |
- AV_CPU_FLAG_SSE2 |
- AV_CPU_FLAG_SSE |
- AV_CPU_FLAG_ATOM |
- AV_CPU_FLAG_SSSE3SLOW |
- AV_CPU_FLAG_SSE3SLOW |
- AV_CPU_FLAG_SSE2SLOW))
- return 16;
-#endif
+ if (ARCH_AARCH64)
+ return ff_get_cpu_max_align_aarch64();
+ if (ARCH_ARM)
+ return ff_get_cpu_max_align_arm();
+ if (ARCH_PPC)
+ return ff_get_cpu_max_align_ppc();
+ if (ARCH_X86)
+ return ff_get_cpu_max_align_x86();
return 8;
}
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 6c352abe1b..b8bf1e5396 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -44,4 +44,9 @@ int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);
+size_t ff_get_cpu_max_align_aarch64(void);
+size_t ff_get_cpu_max_align_arm(void);
+size_t ff_get_cpu_max_align_ppc(void);
+size_t ff_get_cpu_max_align_x86(void);
+
#endif /* AVUTIL_CPU_INTERNAL_H */
diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c
index 0f1e982624..7bb7cd813c 100644
--- a/libavutil/ppc/cpu.c
+++ b/libavutil/ppc/cpu.c
@@ -148,3 +148,15 @@ out:
#endif /* HAVE_ALTIVEC */
return 0;
}
+
+size_t ff_get_cpu_max_align_ppc(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & (AV_CPU_FLAG_ALTIVEC |
+ AV_CPU_FLAG_VSX |
+ AV_CPU_FLAG_POWER8))
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 3800a11ad8..f33088c8c7 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -233,3 +233,30 @@ int ff_get_cpu_flags_x86(void)
return rval;
}
+
+size_t ff_get_cpu_max_align_x86(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & (AV_CPU_FLAG_AVX2 |
+ AV_CPU_FLAG_AVX |
+ AV_CPU_FLAG_XOP |
+ AV_CPU_FLAG_FMA4 |
+ AV_CPU_FLAG_FMA3 |
+ AV_CPU_FLAG_AVXSLOW))
+ return 32;
+ if (flags & (AV_CPU_FLAG_AESNI |
+ AV_CPU_FLAG_SSE42 |
+ AV_CPU_FLAG_SSE4 |
+ AV_CPU_FLAG_SSSE3 |
+ AV_CPU_FLAG_SSE3 |
+ AV_CPU_FLAG_SSE2 |
+ AV_CPU_FLAG_SSE |
+ AV_CPU_FLAG_ATOM |
+ AV_CPU_FLAG_SSSE3SLOW |
+ AV_CPU_FLAG_SSE3SLOW |
+ AV_CPU_FLAG_SSE2SLOW))
+ return 16;
+
+ return 8;
+}