From 13dfce3d44f99a2d7df71aba8ae003d58db726f7 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Sat, 23 Apr 2011 19:24:31 +0200 Subject: Increase alignment of av_malloc() as needed by AVX ASM. Signed-off-by: Reinhard Tartler --- libavutil/mem.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'libavutil/mem.c') diff --git a/libavutil/mem.c b/libavutil/mem.c index 2aef9b0a1b..27bb30b8ef 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -69,21 +69,21 @@ void *av_malloc(size_t size) #endif /* let's disallow possible ambiguous cases */ - if(size > (INT_MAX-16) ) + if(size > (INT_MAX-32) ) return NULL; #if CONFIG_MEMALIGN_HACK - ptr = malloc(size+16); + ptr = malloc(size+32); if(!ptr) return ptr; - diff= ((-(long)ptr - 1)&15) + 1; + diff= ((-(long)ptr - 1)&31) + 1; ptr = (char*)ptr + diff; ((char*)ptr)[-1]= diff; #elif HAVE_POSIX_MEMALIGN - if (posix_memalign(&ptr,16,size)) + if (posix_memalign(&ptr,32,size)) ptr = NULL; #elif HAVE_MEMALIGN - ptr = memalign(16,size); + ptr = memalign(32,size); /* Why 64? Indeed, we should align it: on 4 for 386 @@ -93,10 +93,8 @@ void *av_malloc(size_t size) Because L1 and L2 caches are aligned on those values. But I don't want to code such logic here! */ - /* Why 16? - Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs - it will just trigger an exception and the unaligned load will be done in the - exception handler or it will just segfault (SSE2 on P4). + /* Why 32? + For AVX ASM. SSE / NEON needs only 16. Why not larger? Because I did not see a difference in benchmarks ... */ /* benchmarks with P3 -- cgit v1.2.3