summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2008-01-21 13:33:18 +0000
committerMichael Niedermayer <michaelni@gmx.at>2008-01-21 13:33:18 +0000
commitc448a09624eff091fe3490ff09852e95dc1026b1 (patch)
tree6460099a9fed6fec7aaa209a76dc9587a024ddef
parentb21cd0bcb50c5833c20c18e78f3df8c041d3a4bc (diff)
Faster ff_sqrt()
Originally committed as revision 11586 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavutil/internal.h34
-rw-r--r--libavutil/mathematics.c14
2 files changed, 28 insertions, 20 deletions
diff --git a/libavutil/internal.h b/libavutil/internal.h
index fd6279b602..c4c151d8f7 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -178,24 +178,28 @@ extern const uint32_t ff_inverse[256];
# define FASTDIV(a,b) ((a)/(b))
#endif
-extern const uint8_t ff_sqrt_tab[128];
+extern const uint8_t ff_sqrt_tab[256];
-static inline int ff_sqrt(int a)
+static inline int av_log2_16bit(unsigned int v);
+
+static inline unsigned int ff_sqrt(unsigned int a)
{
- int ret=0;
- int s, b;
-
- if(a<128) return ff_sqrt_tab[a];
-
- for(s=30; s>=0; s-=2){
- ret+=ret;
- b= (1+2*ret)<<s;
- if(b<=a){
- a-=b;
- ret++;
- }
+ unsigned int b;
+
+ if(a<255) return (ff_sqrt_tab[a+1]-1)>>4;
+ else if(a<(1<<12)) b= ff_sqrt_tab[a>>4 ]>>2;
+#ifndef CONFIG_SMALL
+ else if(a<(1<<14)) b= ff_sqrt_tab[a>>6 ]>>1;
+ else if(a<(1<<16)) b= ff_sqrt_tab[a>>8 ] ;
+#endif
+ else{
+ int s= av_log2_16bit(a>>16)>>1;
+ unsigned int c= a>>(s+2);
+ b= ff_sqrt_tab[c>>(s+8)];
+ b= FASTDIV(c,b) + (b<<s);
}
- return ret;
+
+ return b - (a<b*b);
}
#if defined(ARCH_X86)
diff --git a/libavutil/mathematics.c b/libavutil/mathematics.c
index f9c4a68573..ef93ba4a1c 100644
--- a/libavutil/mathematics.c
+++ b/libavutil/mathematics.c
@@ -26,11 +26,15 @@
#include "common.h"
#include "mathematics.h"
-const uint8_t ff_sqrt_tab[128]={
- 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11
+const uint8_t ff_sqrt_tab[256]={
+ 0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
+ 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
+157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
+182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
+203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
+222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
+240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
};
const uint8_t ff_log2_tab[256]={