Initial revision

Originally committed as revision 2 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Fabrice Bellard <fabrice@bellard.org> 2000-12-20 00:02:47 +0000
committer: Fabrice Bellard <fabrice@bellard.org> 2000-12-20 00:02:47 +0000
commit: 9aeeeb63f7e1ab7b0b7bb839a5f258667a2d2d78 (patch)
tree: 133769894d45da35e05ded6ea39d33bb81e7ae18 /libav
parent: 77bb6835ba752bb9335d208963a53227bbb1bc63 (diff)
19 files changed, 7457 insertions, 0 deletions
diff --git a/libav/Makefile b/libav/Makefile
new file mode 100644
index 0000000000..6664e870cb
--- /dev/null
+++ b/libav/Makefile
@@ -0,0 +1,17 @@
+CFLAGS= -O2 -Wall -g
+LDFLAGS= -g
+
+OBJS= common.o mpegvideo.o h263enc.o jrevdct.o jfdctfst.o \
+      mpegaudio.o ac3enc.o mjpegenc.o resample.o
+LIB= libav.a
+
+all: $(LIB)
+
+$(LIB): $(OBJS)
+	ar rcs $@ $(OBJS)
+
+%.o: %.c
+	gcc $(CFLAGS) -c -o $@ $< 
+
+clean: 
+	rm -f *.o *~ *.a 
diff --git a/libav/ac3enc.c b/libav/ac3enc.c
new file mode 100644
index 0000000000..b1126c4943
--- /dev/null
+++ b/libav/ac3enc.c
@@ -0,0 +1,1460 @@
+/*
+ * The simplest AC3 encoder
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <netinet/in.h>
+#include <math.h>
+#include "avcodec.h"
+
+#include "ac3enc.h"
+#include "ac3tab.h"
+
+//#define DEBUG
+//#define DEBUG_BITALLOC
+#define NDEBUG
+#include <assert.h>
+
+#define MDCT_NBITS 9
+#define N         (1 << MDCT_NBITS)
+#define NB_BLOCKS 6 /* number of PCM blocks inside an AC3 frame */
+
+/* new exponents are sent if their Norm 1 exceed this number */
+#define EXP_DIFF_THRESHOLD 1000
+
+/* exponent encoding strategy */
+#define EXP_REUSE 0
+#define EXP_NEW   1
+
+#define EXP_D15   1
+#define EXP_D25   2
+#define EXP_D45   3
+
+static void fft_init(int ln);
+static void ac3_crc_init(void);
+
+static inline INT16 fix15(float a)
+{
+    int v;
+    v = (int)(a * (float)(1 << 15));
+    if (v < -32767)
+        v = -32767;
+    else if (v > 32767) 
+        v = 32767;
+    return v;
+}
+
+static inline int calc_lowcomp1(int a, int b0, int b1)
+{
+    if ((b0 + 256) == b1) {
+        a = 384 ;
+    } else if (b0 > b1) { 
+        a = a - 64;
+        if (a < 0) a=0;
+    }
+    return a;
+}
+
+static inline int calc_lowcomp(int a, int b0, int b1, int bin)
+{
+    if (bin < 7) {
+        if ((b0 + 256) == b1) {
+            a = 384 ;
+        } else if (b0 > b1) { 
+            a = a - 64;
+            if (a < 0) a=0;
+        }
+    } else if (bin < 20) {
+        if ((b0 + 256) == b1) {
+            a = 320 ;
+        } else if (b0 > b1) {
+            a= a - 64;
+            if (a < 0) a=0;
+        }
+    } else {
+        a = a - 128;
+        if (a < 0) a=0;
+    }
+    return a;
+}
+
+/* AC3 bit allocation. The algorithm is the one described in the AC3
+   spec with some optimizations because of our simplified encoding
+   assumptions. */
+void parametric_bit_allocation(AC3EncodeContext *s, UINT8 *bap,
+                               INT8 *exp, int start, int end,
+                               int snroffset, int fgain)
+{
+    int bin,i,j,k,end1,v,v1,bndstrt,bndend,lowcomp,begin;
+    int fastleak,slowleak,address,tmp;
+    INT16 psd[256]; /* scaled exponents */
+    INT16 bndpsd[50]; /* interpolated exponents */
+    INT16 excite[50]; /* excitation */
+    INT16 mask[50];   /* masking value */
+
+    /* exponent mapping to PSD */
+    for(bin=start;bin<end;bin++) {
+        psd[bin]=(3072 - (exp[bin] << 7));
+    }
+
+    /* PSD integration */
+    j=start;
+    k=masktab[start];
+    do {
+        v=psd[j];
+        j++;
+        end1=bndtab[k+1];
+        if (end1 > end) end1=end;
+        for(i=j;i<end1;i++) {
+            int c,adr;
+            /* logadd */
+            v1=psd[j];
+            c=v-v1;
+            if (c >= 0) {
+                adr=c >> 1;
+                if (adr > 255) adr=255;
+                v=v + latab[adr];
+            } else {
+                adr=(-c) >> 1;
+                if (adr > 255) adr=255;
+                v=v1 + latab[adr];
+            }
+            j++;
+        }
+        bndpsd[k]=v;
+        k++;
+    } while (end > bndtab[k]);
+
+    /* excitation function */
+    bndstrt = masktab[start];
+    bndend = masktab[end-1] + 1;
+    
+    lowcomp = 0;
+    lowcomp = calc_lowcomp1(lowcomp, bndpsd[0], bndpsd[1]) ;
+    excite[0] = bndpsd[0] - fgain - lowcomp ;
+    lowcomp = calc_lowcomp1(lowcomp, bndpsd[1], bndpsd[2]) ;
+    excite[1] = bndpsd[1] - fgain - lowcomp ;
+    begin = 7 ;
+    for (bin = 2; bin < 7; bin++) {
+        lowcomp = calc_lowcomp1(lowcomp, bndpsd[bin], bndpsd[bin+1]) ;
+        fastleak = bndpsd[bin] - fgain ;
+        slowleak = bndpsd[bin] - s->sgain ;
+        excite[bin] = fastleak - lowcomp ;
+        if (bndpsd[bin] <= bndpsd[bin+1]) {
+            begin = bin + 1 ;
+            break ;
+        }
+    }
+    
+    end1=bndend;
+    if (end1 > 22) end1=22;
+    
+    for (bin = begin; bin < end1; bin++) {
+        lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin) ;
+        
+        fastleak -= s->fdecay ;
+        v = bndpsd[bin] - fgain;
+        if (fastleak < v) fastleak = v;
+        
+        slowleak -= s->sdecay ;
+        v = bndpsd[bin] - s->sgain;
+        if (slowleak < v) slowleak = v;
+        
+        v=fastleak - lowcomp;
+        if (slowleak > v) v=slowleak;
+        
+        excite[bin] = v;
+    }
+
+    for (bin = 22; bin < bndend; bin++) {
+        fastleak -= s->fdecay ;
+        v = bndpsd[bin] - fgain;
+        if (fastleak < v) fastleak = v;
+        slowleak -= s->sdecay ;
+        v = bndpsd[bin] - s->sgain;
+        if (slowleak < v) slowleak = v;
+
+        v=fastleak;
+        if (slowleak > v) v = slowleak;
+        excite[bin] = v;
+    }
+
+    /* compute masking curve */
+
+    for (bin = bndstrt; bin < bndend; bin++) {
+        v1 = excite[bin];
+        tmp = s->dbknee - bndpsd[bin];
+        if (tmp > 0) {
+            v1 += tmp >> 2;
+        }
+        v=hth[bin >> s->halfratecod][s->fscod];
+        if (v1 > v) v=v1;
+        mask[bin] = v;
+    }
+
+    /* compute bit allocation */
+    
+    i = start ;
+    j = masktab[start] ;
+    do {
+        v=mask[j];
+        v -= snroffset ;
+        v -= s->floor ;
+        if (v < 0) v = 0;
+        v &= 0x1fe0 ;
+        v += s->floor ;
+
+        end1=bndtab[j] + bndsz[j];
+        if (end1 > end) end1=end;
+
+        for (k = i; k < end1; k++) {
+            address = (psd[i] - v) >> 5 ;
+            if (address < 0) address=0;
+            else if (address > 63) address=63;
+            bap[i] = baptab[address];
+            i++;
+        }
+    } while (end > bndtab[j++]) ;
+}
+
+typedef struct IComplex {
+    short re,im;
+} IComplex;
+
+static void fft_init(int ln)
+{
+    int i, j, m, n;
+    float alpha;
+
+    n = 1 << ln;
+
+    for(i=0;i<(n/2);i++) {
+        alpha = 2 * M_PI * (float)i / (float)n;
+        costab[i] = fix15(cos(alpha));
+        sintab[i] = fix15(sin(alpha));
+    }
+
+    for(i=0;i<n;i++) {
+        m=0;
+        for(j=0;j<ln;j++) {
+            m |= ((i >> j) & 1) << (ln-j-1);
+        }
+        fft_rev[i]=m;
+    }
+}
+
+/* butter fly op */
+#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
+{\
+  int ax, ay, bx, by;\
+  bx=pre1;\
+  by=pim1;\
+  ax=qre1;\
+  ay=qim1;\
+  pre = (bx + ax) >> 1;\
+  pim = (by + ay) >> 1;\
+  qre = (bx - ax) >> 1;\
+  qim = (by - ay) >> 1;\
+}
+
+#define MUL16(a,b) ((a) * (b))
+
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+   pre = (MUL16(are, bre) - MUL16(aim, bim)) >> 15;\
+   pim = (MUL16(are, bim) + MUL16(bre, aim)) >> 15;\
+}
+
+
+/* do a 2^n point complex fft on 2^ln points. */
+static void fft(IComplex *z, int ln)
+{
+    int	j, l, np, np2;
+    int	nblocks, nloops;
+    register IComplex *p,*q;
+    int tmp_re, tmp_im;
+
+    np = 1 << ln;
+
+    /* reverse */
+    for(j=0;j<np;j++) {
+        int k;
+        IComplex tmp;
+        k = fft_rev[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+
+    /* pass 0 */
+
+    p=&z[0];
+    j=(np >> 1);
+    do {
+        BF(p[0].re, p[0].im, p[1].re, p[1].im, 
+           p[0].re, p[0].im, p[1].re, p[1].im);
+        p+=2;
+    } while (--j != 0);
+
+    /* pass 1 */
+
+    p=&z[0];
+    j=np >> 2;
+    do {
+        BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+           p[0].re, p[0].im, p[2].re, p[2].im);
+        BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+           p[1].re, p[1].im, p[3].im, -p[3].re);
+        p+=4;
+    } while (--j != 0);
+
+    /* pass 2 .. ln-1 */
+
+    nblocks = np >> 3;
+    nloops = 1 << 2;
+    np2 = np >> 1;
+    do {
+        p = z;
+        q = z + nloops;
+        for (j = 0; j < nblocks; ++j) {
+
+            BF(p->re, p->im, q->re, q->im,
+               p->re, p->im, q->re, q->im);
+            
+            p++;
+            q++;
+            for(l = nblocks; l < np2; l += nblocks) {
+                CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im);
+                BF(p->re, p->im, q->re, q->im,
+                   p->re, p->im, tmp_re, tmp_im);
+                p++;
+                q++;
+            }
+            p += nloops;
+            q += nloops;
+        }
+        nblocks = nblocks >> 1;
+        nloops = nloops << 1;
+    } while (nblocks != 0);
+}
+
+/* do a 512 point mdct */
+static void mdct512(INT32 *out, INT16 *in)
+{
+    int i, re, im, re1, im1;
+    INT16 rot[N]; 
+    IComplex x[N/4];
+
+    /* shift to simplify computations */
+    for(i=0;i<N/4;i++)
+        rot[i] = -in[i + 3*N/4];
+    for(i=N/4;i<N;i++)
+        rot[i] = in[i - N/4];
+        
+    /* pre rotation */
+    for(i=0;i<N/4;i++) {
+        re = ((int)rot[2*i] - (int)rot[N-1-2*i]) >> 1;
+        im = -((int)rot[N/2+2*i] - (int)rot[N/2-1-2*i]) >> 1;
+        CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]);
+    }
+
+    fft(x, MDCT_NBITS - 2);
+  
+    /* post rotation */
+    for(i=0;i<N/4;i++) {
+        re = x[i].re;
+        im = x[i].im;
+        CMUL(re1, im1, re, im, xsin1[i], xcos1[i]);
+        out[2*i] = im1;
+        out[N/2-1-2*i] = re1;
+    }
+}
+
+/* XXX: use another norm ? */
+static int calc_exp_diff(UINT8 *exp1, UINT8 *exp2, int n)
+{
+    int sum, i;
+    sum = 0;
+    for(i=0;i<n;i++) {
+        sum += abs(exp1[i] - exp2[i]);
+    }
+    return sum;
+}
+
+static void compute_exp_strategy(UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS],
+                                 UINT8 exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                                 int ch)
+{
+    int i, j;
+    int exp_diff;
+    
+    /* estimate if the exponent variation & decide if they should be
+       reused in the next frame */
+    exp_strategy[0][ch] = EXP_NEW;
+    for(i=1;i<NB_BLOCKS;i++) {
+        exp_diff = calc_exp_diff(exp[i][ch], exp[i-1][ch], N/2);
+#ifdef DEBUG            
+        printf("exp_diff=%d\n", exp_diff);
+#endif
+        if (exp_diff > EXP_DIFF_THRESHOLD)
+            exp_strategy[i][ch] = EXP_NEW;
+        else
+            exp_strategy[i][ch] = EXP_REUSE;
+    }
+    /* now select the encoding strategy type : if exponents are often
+       recoded, we use a coarse encoding */
+    i = 0;
+    while (i < NB_BLOCKS) {
+        j = i + 1;
+        while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE)
+            j++;
+        switch(j - i) {
+        case 1:
+            exp_strategy[i][ch] = EXP_D45;
+            break;
+        case 2:
+        case 3:
+            exp_strategy[i][ch] = EXP_D25;
+            break;
+        default:
+            exp_strategy[i][ch] = EXP_D15;
+            break;
+        }
+        i = j;
+    }
+}
+
+/* set exp[i] to min(exp[i], exp1[i]) */
+static void exponent_min(UINT8 exp[N/2], UINT8 exp1[N/2], int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        if (exp1[i] < exp[i])
+            exp[i] = exp1[i];
+    }
+}
+                                 
+/* update the exponents so that they are the ones the decoder will
+   decode. Return the number of bits used to code the exponents */
+static int encode_exp(UINT8 encoded_exp[N/2], 
+                      UINT8 exp[N/2], 
+                      int nb_exps,
+                      int exp_strategy)
+{
+    int group_size, nb_groups, i, j, k, recurse, exp_min, delta;
+    UINT8 exp1[N/2];
+
+    switch(exp_strategy) {
+    case EXP_D15:
+        group_size = 1;
+        break;
+    case EXP_D25:
+        group_size = 2;
+        break;
+    default:
+    case EXP_D45:
+        group_size = 4;
+        break;
+    }
+    nb_groups = ((nb_exps + (group_size * 3) - 4) / (3 * group_size)) * 3;
+
+    /* for each group, compute the minimum exponent */
+    exp1[0] = exp[0]; /* DC exponent is handled separately */
+    k = 1;
+    for(i=1;i<=nb_groups;i++) {
+        exp_min = exp[k];
+        assert(exp_min >= 0 && exp_min <= 24);
+        for(j=1;j<group_size;j++) {
+            if (exp[k+j] < exp_min)
+                exp_min = exp[k+j];
+        }
+        exp1[i] = exp_min;
+        k += group_size;
+    }
+
+    /* constraint for DC exponent */
+    if (exp1[0] > 15)
+        exp1[0] = 15;
+
+    /* Iterate until the delta constraints between each groups are
+       satisfyed. I'm sure it is possible to find a better algorithm,
+       but I am lazy */
+    do {
+        recurse = 0;
+        for(i=1;i<=nb_groups;i++) {
+            delta = exp1[i] - exp1[i-1];
+            if (delta > 2) {
+                /* if delta too big, we encode a smaller exponent */
+                exp1[i] = exp1[i-1] + 2;
+            } else if (delta < -2) {
+                /* if delta is too small, we must decrease the previous
+               exponent, which means we must recurse */
+                recurse = 1;
+                exp1[i-1] = exp1[i] + 2;
+            }
+        }
+    } while (recurse);
+    
+    /* now we have the exponent values the decoder will see */
+    encoded_exp[0] = exp1[0];
+    k = 1;
+    for(i=1;i<=nb_groups;i++) {
+        for(j=0;j<group_size;j++) {
+            encoded_exp[k+j] = exp1[i];
+        }
+        k += group_size;
+    }
+    
+#if defined(DEBUG)
+    printf("exponents: strategy=%d\n", exp_strategy);
+    for(i=0;i<=nb_groups * group_size;i++) {
+        printf("%d ", encoded_exp[i]);
+    }
+    printf("\n");
+#endif
+
+    return 4 + (nb_groups / 3) * 7;
+}
+
+/* return the size in bits taken by the mantissa */
+int compute_mantissa_size(AC3EncodeContext *s, UINT8 *m, int nb_coefs)
+{
+    int bits, mant, i;
+
+    bits = 0;
+    for(i=0;i<nb_coefs;i++) {
+        mant = m[i];
+        switch(mant) {
+        case 0:
+            /* nothing */
+            break;
+        case 1:
+            /* 3 mantissa in 5 bits */
+            if (s->mant1_cnt == 0) 
+                bits += 5;
+            if (++s->mant1_cnt == 3)
+                s->mant1_cnt = 0;
+            break;
+        case 2:
+            /* 3 mantissa in 7 bits */
+            if (s->mant2_cnt == 0) 
+                bits += 7;
+            if (++s->mant2_cnt == 3)
+                s->mant2_cnt = 0;
+            break;
+        case 3:
+            bits += 3;
+            break;
+        case 4:
+            /* 2 mantissa in 7 bits */
+            if (s->mant4_cnt == 0)
+                bits += 7;
+            if (++s->mant4_cnt == 2) 
+                s->mant4_cnt = 0;
+            break;
+        case 14:
+            bits += 14;
+            break;
+        case 15:
+            bits += 16;
+            break;
+        default:
+            bits += mant - 1;
+            break;
+        }
+    }
+    return bits;
+}
+
+
+static int bit_alloc(AC3EncodeContext *s,
+                     UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                     UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                     UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS],
+                     int frame_bits, int csnroffst, int fsnroffst)
+{
+    int i, ch;
+
+    /* compute size */
+    for(i=0;i<NB_BLOCKS;i++) {
+        s->mant1_cnt = 0;
+        s->mant2_cnt = 0;
+        s->mant4_cnt = 0;
+        for(ch=0;ch<s->nb_channels;ch++) {
+            parametric_bit_allocation(s, bap[i][ch], encoded_exp[i][ch], 
+                                      0, s->nb_coefs[ch], 
+                                      (((csnroffst-15) << 4) + 
+                                       fsnroffst) << 2, 
+                                      fgaintab[s->fgaincod[ch]]);
+            frame_bits += compute_mantissa_size(s, bap[i][ch], 
+                                                 s->nb_coefs[ch]);
+        }
+    }
+#if 0
+    printf("csnr=%d fsnr=%d frame_bits=%d diff=%d\n", 
+           csnroffst, fsnroffst, frame_bits, 
+           16 * s->frame_size - ((frame_bits + 7) & ~7));
+#endif
+    return 16 * s->frame_size - frame_bits;
+}
+
+#define SNR_INC1 4
+
+static int compute_bit_allocation(AC3EncodeContext *s,
+                                  UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                                  UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                                  UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS],
+                                  int frame_bits)
+{
+    int i, ch;
+    int csnroffst, fsnroffst;
+    UINT8 bap1[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+
+    /* init default parameters */
+    s->sdecaycod = 2;
+    s->fdecaycod = 1;
+    s->sgaincod = 1;
+    s->dbkneecod = 2;
+    s->floorcod = 4;
+    for(ch=0;ch<s->nb_channels;ch++) 
+        s->fgaincod[ch] = 4;
+    
+    /* compute real values */
+    s->sdecay = sdecaytab[s->sdecaycod] >> s->halfratecod;
+    s->fdecay = fdecaytab[s->fdecaycod] >> s->halfratecod;
+    s->sgain = sgaintab[s->sgaincod];
+    s->dbknee = dbkneetab[s->dbkneecod];
+    s->floor = floortab[s->floorcod];
+
+    /* header size */
+    frame_bits += 65;
+    if (s->acmod == 2)
+        frame_bits += 2;
+
+    /* audio blocks */
+    for(i=0;i<NB_BLOCKS;i++) {
+        frame_bits += s->nb_channels * 2 + 2;
+        if (s->acmod == 2)
+            frame_bits++;
+        frame_bits += 2 * s->nb_channels;
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (exp_strategy[i][ch] != EXP_REUSE)
+                frame_bits += 6 + 2;
+        }
+        frame_bits++; /* baie */
+        frame_bits++; /* snr */
+        frame_bits += 2; /* delta / skip */
+    }
+    frame_bits++; /* cplinu for block 0 */
+    /* bit alloc info */
+    frame_bits += 2*4 + 3 + 6 + s->nb_channels * (4 + 3);
+
+    /* CRC */
+    frame_bits += 16;
+
+    /* now the big work begins : do the bit allocation. Modify the snr
+       offset until we can pack everything in the requested frame size */
+
+    csnroffst = s->csnroffst;
+    while (csnroffst >= 0 && 
+           bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
+        csnroffst -= SNR_INC1;
+    if (csnroffst < 0) {
+        fprintf(stderr, "Error !!!\n");
+        return -1;
+    }
+    while ((csnroffst + SNR_INC1) <= 63 && 
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, 
+                     csnroffst + SNR_INC1, 0) >= 0) {
+        csnroffst += SNR_INC1;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+    while ((csnroffst + 1) <= 63 && 
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, csnroffst + 1, 0) >= 0) {
+        csnroffst++;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+
+    fsnroffst = 0;
+    while ((fsnroffst + SNR_INC1) <= 15 && 
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, 
+                     csnroffst, fsnroffst + SNR_INC1) >= 0) {
+        fsnroffst += SNR_INC1;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+    while ((fsnroffst + 1) <= 15 && 
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, 
+                     csnroffst, fsnroffst + 1) >= 0) {
+        fsnroffst++;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+    
+    s->csnroffst = csnroffst;
+    for(ch=0;ch<s->nb_channels;ch++)
+        s->fsnroffst[ch] = fsnroffst;
+#if defined(DEBUG_BITALLOC)
+    {
+        int j;
+
+        for(i=0;i<6;i++) {
+            for(ch=0;ch<s->nb_channels;ch++) {
+                printf("Block #%d Ch%d:\n", i, ch);
+                printf("bap=");
+                for(j=0;j<s->nb_coefs[ch];j++) {
+                    printf("%d ",bap[i][ch][j]);
+                }
+                printf("\n");
+            }
+        }
+    }
+#endif
+    return 0;
+}
+
+static int AC3_encode_init(AVEncodeContext *avctx)
+{
+    int freq = avctx->rate;
+    int bitrate = avctx->bit_rate;
+    int channels = avctx->channels;
+    AC3EncodeContext *s = avctx->priv_data;
+    int i, j, k, l, ch, v;
+    float alpha;
+    static unsigned short freqs[3] = { 48000, 44100, 32000 };
+
+    avctx->frame_size = AC3_FRAME_SIZE;
+    avctx->key_frame = 1; /* always key frame */
+    
+    /* number of channels */
+    if (channels == 1)
+        s->acmod = 1;
+    else if (channels == 2)
+        s->acmod = 2;
+    else
+        return -1;
+    s->nb_channels = channels;
+
+    /* frequency */
+    for(i=0;i<3;i++) {
+        for(j=0;j<3;j++) 
+            if ((freqs[j] >> i) == freq)
+                goto found;
+    }
+    return -1;
+ found:    
+    s->sample_rate = freq;
+    s->halfratecod = i;
+    s->fscod = j;
+    s->bsid = 8 + s->halfratecod;
+    s->bsmod = 0; /* complete main audio service */
+
+    /* bitrate & frame size */
+    bitrate /= 1000;
+    for(i=0;i<19;i++) {
+        if ((bitratetab[i] >> s->halfratecod) == bitrate)
+            break;
+    }
+    if (i == 19)
+        return -1;
+    s->bit_rate = bitrate;
+    s->frmsizecod = i << 1;
+    s->frame_size_min = (bitrate * 1000 * AC3_FRAME_SIZE) / (freq * 16);
+    /* for now we do not handle fractional sizes */
+    s->frame_size = s->frame_size_min;
+    
+    /* bit allocation init */
+    for(ch=0;ch<s->nb_channels;ch++) {
+        /* bandwidth for each channel */
+        /* XXX: should compute the bandwidth according to the frame
+           size, so that we avoid anoying high freq artefacts */
+        s->chbwcod[ch] = 50; /* sample bandwidth as mpeg audio layer 2 table 0 */
+        s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37;
+    }
+    /* initial snr offset */
+    s->csnroffst = 40;
+
+    /* compute bndtab and masktab from bandsz */
+    k = 0;
+    l = 0;
+    for(i=0;i<50;i++) {
+        bndtab[i] = l;
+        v = bndsz[i];
+        for(j=0;j<v;j++) masktab[k++]=i;
+        l += v;
+    }
+    bndtab[50] = 0;
+
+    /* mdct init */
+    fft_init(MDCT_NBITS - 2);
+    for(i=0;i<N/4;i++) {
+        alpha = 2 * M_PI * (i + 1.0 / 8.0) / (float)N;
+        xcos1[i] = fix15(-cos(alpha));
+        xsin1[i] = fix15(-sin(alpha));
+    }
+
+    ac3_crc_init();
+
+    return 0;
+}
+
+/* output the AC3 frame header */
+static void output_frame_header(AC3EncodeContext *s, unsigned char *frame)
+{
+    init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE, NULL, NULL);
+
+    put_bits(&s->pb, 16, 0x0b77); /* frame header */
+    put_bits(&s->pb, 16, 0); /* crc1: will be filled later */
+    put_bits(&s->pb, 2, s->fscod);
+    put_bits(&s->pb, 6, s->frmsizecod + (s->frame_size - s->frame_size_min));
+    put_bits(&s->pb, 5, s->bsid);
+    put_bits(&s->pb, 3, s->bsmod);
+    put_bits(&s->pb, 3, s->acmod);
+    if (s->acmod == 2) {
+        put_bits(&s->pb, 2, 0); /* surround not indicated */
+    }
+    put_bits(&s->pb, 1, 0); /* no LFE */
+    put_bits(&s->pb, 5, 31); /* dialog norm: -31 db */
+    put_bits(&s->pb, 1, 0); /* no compression control word */
+    put_bits(&s->pb, 1, 0); /* no lang code */
+    put_bits(&s->pb, 1, 0); /* no audio production info */
+    put_bits(&s->pb, 1, 0); /* no copyright */
+    put_bits(&s->pb, 1, 1); /* original bitstream */
+    put_bits(&s->pb, 1, 0); /* no time code 1 */
+    put_bits(&s->pb, 1, 0); /* no time code 2 */
+    put_bits(&s->pb, 1, 0); /* no addtional bit stream info */
+}
+
+/* symetric quantization on 'levels' levels */
+static inline int sym_quant(int c, int e, int levels)
+{
+    int v;
+
+    if (c >= 0) {
+        v = (levels * (c << e)) >> 25;
+        v = (levels >> 1) + v;
+    } else {
+        v = (levels * ((-c) << e)) >> 25;
+        v = (levels >> 1) - v;
+    }
+    assert (v >= 0 && v < levels);
+    return v;
+}
+
+/* asymetric quantization on 2^qbits levels */
+static inline int asym_quant(int c, int e, int qbits)
+{
+    int lshift, m, v;
+
+    lshift = e + qbits - 24;
+    if (lshift >= 0)
+        v = c << lshift;
+    else
+        v = c >> (-lshift);
+    /* rounding */
+    v = (v + 1) >> 1;
+    m = (1 << (qbits-1));
+    if (v >= m)
+        v = m - 1;
+    assert(v >= -m);
+    return v & ((1 << qbits)-1);
+}
+
+/* Output one audio block. There are NB_BLOCKS audio blocks in one AC3
+   frame */
+static void output_audio_block(AC3EncodeContext *s,
+                               UINT8 exp_strategy[AC3_MAX_CHANNELS],
+                               UINT8 encoded_exp[AC3_MAX_CHANNELS][N/2],
+                               UINT8 bap[AC3_MAX_CHANNELS][N/2],
+                               INT32 mdct_coefs[AC3_MAX_CHANNELS][N/2],
+                               INT8 global_exp[AC3_MAX_CHANNELS],
+                               int block_num)
+{
+    int ch, nb_groups, group_size, i, baie;
+    UINT8 *p;
+    UINT16 qmant[AC3_MAX_CHANNELS][N/2];
+    int exp0, exp1;
+    int mant1_cnt, mant2_cnt, mant4_cnt;
+    UINT16 *qmant1_ptr, *qmant2_ptr, *qmant4_ptr;
+    int delta0, delta1, delta2;
+
+    for(ch=0;ch<s->nb_channels;ch++) 
+        put_bits(&s->pb, 1, 0); /* 512 point MDCT */
+    for(ch=0;ch<s->nb_channels;ch++) 
+        put_bits(&s->pb, 1, 1); /* no dither */
+    put_bits(&s->pb, 1, 0); /* no dynamic range */
+    if (block_num == 0) {
+        /* for block 0, even if no coupling, we must say it. This is a
+           waste of bit :-) */
+        put_bits(&s->pb, 1, 1); /* coupling strategy present */
+        put_bits(&s->pb, 1, 0); /* no coupling strategy */
+    } else {
+        put_bits(&s->pb, 1, 0); /* no new coupling strategy */
+    }
+
+    if (s->acmod == 2) {
+        put_bits(&s->pb, 1, 0); /* no matrixing (but should be used in the future) */
+    }
+
+#if defined(DEBUG) 
+    {
+        static int count = 0;
+        printf("Block #%d (%d)\n", block_num, count++);
+    }
+#endif
+    /* exponent strategy */
+    for(ch=0;ch<s->nb_channels;ch++) {
+        put_bits(&s->pb, 2, exp_strategy[ch]);
+    }
+    
+    for(ch=0;ch<s->nb_channels;ch++) {
+        if (exp_strategy[ch] != EXP_REUSE)
+            put_bits(&s->pb, 6, s->chbwcod[ch]);
+    }
+    
+    /* exponents */
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        switch(exp_strategy[ch]) {
+        case EXP_REUSE:
+            continue;
+        case EXP_D15:
+            group_size = 1;
+            break;
+        case EXP_D25:
+            group_size = 2;
+            break;
+        default:
+        case EXP_D45:
+            group_size = 4;
+            break;
+        }
+        nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
+        p = encoded_exp[ch];
+
+        /* first exponent */
+        exp1 = *p++;
+        put_bits(&s->pb, 4, exp1);
+
+        /* next ones are delta encoded */
+        for(i=0;i<nb_groups;i++) {
+            /* merge three delta in one code */
+            exp0 = exp1;
+            exp1 = p[0];
+            p += group_size;
+            delta0 = exp1 - exp0 + 2;
+
+            exp0 = exp1;
+            exp1 = p[0];
+            p += group_size;
+            delta1 = exp1 - exp0 + 2;
+
+            exp0 = exp1;
+            exp1 = p[0];
+            p += group_size;
+            delta2 = exp1 - exp0 + 2;
+
+            put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2);
+        }
+
+        put_bits(&s->pb, 2, 0); /* no gain range info */
+    }
+
+    /* bit allocation info */
+    baie = (block_num == 0);
+    put_bits(&s->pb, 1, baie);
+    if (baie) {
+        put_bits(&s->pb, 2, s->sdecaycod);
+        put_bits(&s->pb, 2, s->fdecaycod);
+        put_bits(&s->pb, 2, s->sgaincod);
+        put_bits(&s->pb, 2, s->dbkneecod);
+        put_bits(&s->pb, 3, s->floorcod);
+    }
+
+    /* snr offset */
+    put_bits(&s->pb, 1, baie); /* always present with bai */
+    if (baie) {
+        put_bits(&s->pb, 6, s->csnroffst);
+        for(ch=0;ch<s->nb_channels;ch++) {
+            put_bits(&s->pb, 4, s->fsnroffst[ch]);
+            put_bits(&s->pb, 3, s->fgaincod[ch]);
+        }
+    }
+    
+    put_bits(&s->pb, 1, 0); /* no delta bit allocation */
+    put_bits(&s->pb, 1, 0); /* no data to skip */
+
+    /* mantissa encoding : we use two passes to handle the grouping. A
+       one pass method may be faster, but it would necessitate to
+       modify the output stream. */
+
+    /* first pass: quantize */
+    mant1_cnt = mant2_cnt = mant4_cnt = 0;
+    qmant1_ptr = qmant2_ptr = qmant4_ptr = NULL;
+
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        int b, c, e, v;
+
+        for(i=0;i<s->nb_coefs[ch];i++) {
+            c = mdct_coefs[ch][i];
+            e = encoded_exp[ch][i] - global_exp[ch];
+            b = bap[ch][i];
+            switch(b) {
+            case 0:
+                v = 0;
+                break;
+            case 1:
+                v = sym_quant(c, e, 3);
+                switch(mant1_cnt) {
+                case 0:
+                    qmant1_ptr = &qmant[ch][i];
+                    v = 9 * v;
+                    mant1_cnt = 1;
+                    break;
+                case 1:
+                    *qmant1_ptr += 3 * v;
+                    mant1_cnt = 2;
+                    v = 128;
+                    break;
+                default:
+                    *qmant1_ptr += v;
+                    mant1_cnt = 0;
+                    v = 128;
+                    break;
+                }
+                break;
+            case 2:
+                v = sym_quant(c, e, 5);
+                switch(mant2_cnt) {
+                case 0:
+                    qmant2_ptr = &qmant[ch][i];
+                    v = 25 * v;
+                    mant2_cnt = 1;
+                    break;
+                case 1:
+                    *qmant2_ptr += 5 * v;
+                    mant2_cnt = 2;
+                    v = 128;
+                    break;
+                default:
+                    *qmant2_ptr += v;
+                    mant2_cnt = 0;
+                    v = 128;
+                    break;
+                }
+                break;
+            case 3:
+                v = sym_quant(c, e, 7);
+                break;
+            case 4:
+                v = sym_quant(c, e, 11);
+                switch(mant4_cnt) {
+                case 0:
+                    qmant4_ptr = &qmant[ch][i];
+                    v = 11 * v;
+                    mant4_cnt = 1;
+                    break;
+                default:
+                    *qmant4_ptr += v;
+                    mant4_cnt = 0;
+                    v = 128;
+                    break;
+                }
+                break;
+            case 5:
+                v = sym_quant(c, e, 15);
+                break;
+            case 14:
+                v = asym_quant(c, e, 14);
+                break;
+            case 15:
+                v = asym_quant(c, e, 16);
+                break;
+            default:
+                v = asym_quant(c, e, b - 1);
+                break;
+            }
+            qmant[ch][i] = v;
+        }
+    }
+
+    /* second pass : output the values */
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        int b, q;
+        
+        for(i=0;i<s->nb_coefs[ch];i++) {
+            q = qmant[ch][i];
+            b = bap[ch][i];
+            switch(b) {
+            case 0:
+                break;
+            case 1:
+                if (q != 128) 
+                    put_bits(&s->pb, 5, q);
+                break;
+            case 2:
+                if (q != 128) 
+                    put_bits(&s->pb, 7, q);
+                break;
+            case 3:
+                put_bits(&s->pb, 3, q);
+                break;
+            case 4:
+                if (q != 128)
+                    put_bits(&s->pb, 7, q);
+                break;
+            case 14:
+                put_bits(&s->pb, 14, q);
+                break;
+            case 15:
+                put_bits(&s->pb, 16, q);
+                break;
+            default:
+                put_bits(&s->pb, b - 1, q);
+                break;
+            }
+        }
+    }
+}
+
+/* compute the ac3 crc */
+
+#define CRC16_POLY ((1 << 0) | (1 << 2) | (1 << 15) | (1 << 16))
+
+static void ac3_crc_init(void)
+{
+    unsigned int c, n, k;
+
+    for(n=0;n<256;n++) {
+        c = n << 8;
+        for (k = 0; k < 8; k++) {
+            if (c & (1 << 15)) 
+                c = ((c << 1) & 0xffff) ^ (CRC16_POLY & 0xffff);
+            else
+                c = c << 1;
+        }
+        crc_table[n] = c;
+    }
+}
+
+static unsigned int ac3_crc(UINT8 *data, int n, unsigned int crc)
+{
+    int i;
+    for(i=0;i<n;i++) {
+        crc = (crc_table[data[i] ^ (crc >> 8)] ^ (crc << 8)) & 0xffff;
+    }
+    return crc;
+}
+
+static unsigned int mul_poly(unsigned int a, unsigned int b, unsigned int poly)
+{
+    unsigned int c;
+
+    c = 0;
+    while (a) {
+        if (a & 1)
+            c ^= b;
+        a = a >> 1;
+        b = b << 1;
+        if (b & (1 << 16))
+            b ^= poly;
+    }
+    return c;
+}
+
+static unsigned int pow_poly(unsigned int a, unsigned int n, unsigned int poly)
+{
+    unsigned int r;
+    r = 1;
+    while (n) {
+        if (n & 1)
+            r = mul_poly(r, a, poly);
+        a = mul_poly(a, a, poly);
+        n >>= 1;
+    }
+    return r;
+}
+
+
+/* compute log2(max(abs(tab[]))) */
+static int log2_tab(INT16 *tab, int n)
+{
+    int i, v;
+
+    v = 0;
+    for(i=0;i<n;i++) {
+        v |= abs(tab[i]);
+    }
+    return log2(v);
+}
+
+static void lshift_tab(INT16 *tab, int n, int lshift)
+{
+    int i;
+
+    if (lshift > 0) {
+        for(i=0;i<n;i++) {
+            tab[i] <<= lshift;
+        }
+    } else if (lshift < 0) {
+        lshift = -lshift;
+        for(i=0;i<n;i++) {
+            tab[i] >>= lshift;
+        }
+    }
+}
+
+/* fill the end of the frame and compute the two crcs */
+static int output_frame_end(AC3EncodeContext *s)
+{
+    int frame_size, frame_size_58, n, crc1, crc2, crc_inv;
+    UINT8 *frame;
+
+    frame_size = s->frame_size; /* frame size in words */
+    /* align to 8 bits */
+    flush_put_bits(&s->pb);
+    /* add zero bytes to reach the frame size */
+    frame = s->pb.buf;
+    n = 2 * s->frame_size - (s->pb.buf_ptr - frame) - 2;
+    assert(n >= 0);
+    memset(s->pb.buf_ptr, 0, n);
+    
+    /* Now we must compute both crcs : this is not so easy for crc1
+       because it is at the beginning of the data... */
+    frame_size_58 = (frame_size >> 1) + (frame_size >> 3);
+    crc1 = ac3_crc(frame + 4, (2 * frame_size_58) - 4, 0);
+    /* XXX: could precompute crc_inv */
+    crc_inv = pow_poly((CRC16_POLY >> 1), (16 * frame_size_58) - 16, CRC16_POLY);
+    crc1 = mul_poly(crc_inv, crc1, CRC16_POLY);
+    frame[2] = crc1 >> 8;
+    frame[3] = crc1;
+    
+    crc2 = ac3_crc(frame + 2 * frame_size_58, (frame_size - frame_size_58) * 2 - 2, 0);
+    frame[2*frame_size - 2] = crc2 >> 8;
+    frame[2*frame_size - 1] = crc2;
+
+    //    printf("n=%d frame_size=%d\n", n, frame_size);
+    return frame_size * 2;
+}
+
+int AC3_encode_frame(AVEncodeContext *avctx,
+                     unsigned char *frame, int buf_size, void *data)
+{
+    AC3EncodeContext *s = avctx->priv_data;
+    short *samples = data;
+    int i, j, k, v, ch;
+    INT16 input_samples[N];
+    INT32 mdct_coef[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    UINT8 exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS];
+    UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    INT8 exp_samples[NB_BLOCKS][AC3_MAX_CHANNELS];
+    int frame_bits;
+
+    frame_bits = 0;
+    for(ch=0;ch<s->nb_channels;ch++) {
+        /* fixed mdct to the six sub blocks & exponent computation */
+        for(i=0;i<NB_BLOCKS;i++) {
+            INT16 *sptr;
+            int sinc;
+
+            /* compute input samples */
+            memcpy(input_samples, s->last_samples[ch], N/2 * sizeof(INT16));
+            sinc = s->nb_channels;
+            sptr = samples + (sinc * (N/2) * i) + ch;
+            for(j=0;j<N/2;j++) {
+                v = *sptr;
+                input_samples[j + N/2] = v;
+                s->last_samples[ch][j] = v; 
+                sptr += sinc;
+            }
+
+            /* apply the MDCT window */
+            for(j=0;j<N/2;j++) {
+                input_samples[j] = MUL16(input_samples[j], 
+                                         ac3_window[j]) >> 15;
+                input_samples[N-j-1] = MUL16(input_samples[N-j-1], 
+                                             ac3_window[j]) >> 15;
+            }
+        
+            /* Normalize the samples to use the maximum available
+               precision */
+            v = 14 - log2_tab(input_samples, N);
+            if (v < 0)
+                v = 0;
+            exp_samples[i][ch] = v - 8;
+            lshift_tab(input_samples, N, v);
+
+            /* do the MDCT */
+            mdct512(mdct_coef[i][ch], input_samples);
+            
+            /* compute "exponents". We take into account the
+               normalization there */
+            for(j=0;j<N/2;j++) {
+                int e;
+                v = abs(mdct_coef[i][ch][j]);
+                if (v == 0)
+                    e = 24;
+                else {
+                    e = 23 - log2(v) + exp_samples[i][ch];
+                    if (e >= 24) {
+                        e = 24;
+                        mdct_coef[i][ch][j] = 0;
+                    }
+                }
+                exp[i][ch][j] = e;
+            }
+        }
+        
+        compute_exp_strategy(exp_strategy, exp, ch);
+
+        /* compute the exponents as the decoder will see them. The
+           EXP_REUSE case must be handled carefully : we select the
+           min of the exponents */
+        i = 0;
+        while (i < NB_BLOCKS) {
+            j = i + 1;
+            while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) {
+                exponent_min(exp[i][ch], exp[j][ch], s->nb_coefs[ch]);
+                j++;
+            }
+            frame_bits += encode_exp(encoded_exp[i][ch],
+                                     exp[i][ch], s->nb_coefs[ch], 
+                                     exp_strategy[i][ch]);
+            /* copy encoded exponents for reuse case */
+            for(k=i+1;k<j;k++) {
+                memcpy(encoded_exp[k][ch], encoded_exp[i][ch], 
+                       s->nb_coefs[ch] * sizeof(UINT8));
+            }
+            i = j;
+        }
+    }
+
+    compute_bit_allocation(s, bap, encoded_exp, exp_strategy, frame_bits);
+    /* everything is known... let's output the frame */
+    output_frame_header(s, frame);
+        
+    for(i=0;i<NB_BLOCKS;i++) {
+        output_audio_block(s, exp_strategy[i], encoded_exp[i], 
+                           bap[i], mdct_coef[i], exp_samples[i], i);
+    }
+    return output_frame_end(s);
+}
+
+#if 0
+/*************************************************************************/
+/* TEST */
+
+#define FN (N/4)
+
+void fft_test(void)
+{
+    IComplex in[FN], in1[FN];
+    int k, n, i;
+    float sum_re, sum_im, a;
+
+    /* FFT test */
+
+    for(i=0;i<FN;i++) {
+        in[i].re = random() % 65535 - 32767;
+        in[i].im = random() % 65535 - 32767;
+        in1[i] = in[i];
+    }
+    fft(in, 7);
+
+    /* do it by hand */
+    for(k=0;k<FN;k++) {
+        sum_re = 0;
+        sum_im = 0;
+        for(n=0;n<FN;n++) {
+            a = -2 * M_PI * (n * k) / FN;
+            sum_re += in1[n].re * cos(a) - in1[n].im * sin(a);
+            sum_im += in1[n].re * sin(a) + in1[n].im * cos(a);
+        }
+        printf("%3d: %6d,%6d %6.0f,%6.0f\n", 
+               k, in[k].re, in[k].im, sum_re / FN, sum_im / FN); 
+    }
+}
+
+void mdct_test(void)
+{
+    INT16 input[N];
+    INT32 output[N/2];
+    float input1[N];
+    float output1[N/2];
+    float s, a, err, e, emax;
+    int i, k, n;
+
+    for(i=0;i<N;i++) {
+        input[i] = (random() % 65535 - 32767) * 9 / 10;
+        input1[i] = input[i];
+    }
+
+    mdct512(output, input);
+    
+    /* do it by hand */
+    for(k=0;k<N/2;k++) {
+        s = 0;
+        for(n=0;n<N;n++) {
+            a = (2*M_PI*(2*n+1+N/2)*(2*k+1) / (4 * N));
+            s += input1[n] * cos(a);
+        }
+        output1[k] = -2 * s / N;
+    }
+    
+    err = 0;
+    emax = 0;
+    for(i=0;i<N/2;i++) {
+        printf("%3d: %7d %7.0f\n", i, output[i], output1[i]);
+        e = output[i] - output1[i];
+        if (e > emax)
+            emax = e;
+        err += e * e;
+    }
+    printf("err2=%f emax=%f\n", err / (N/2), emax);
+}
+
+void test_ac3(void)
+{
+    AC3EncodeContext ctx;
+    unsigned char frame[AC3_MAX_CODED_FRAME_SIZE];
+    short samples[AC3_FRAME_SIZE];
+    int ret, i;
+    
+    AC3_encode_init(&ctx, 44100, 64000, 1);
+
+    fft_test();
+    mdct_test();
+
+    for(i=0;i<AC3_FRAME_SIZE;i++)
+        samples[i] = (int)(sin(2*M_PI*i*1000.0/44100) * 10000);
+    ret = AC3_encode_frame(&ctx, frame, samples);
+    printf("ret=%d\n", ret);
+}
+#endif
+
+AVEncoder ac3_encoder = {
+    "ac3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AC3,
+    sizeof(AC3EncodeContext),
+    AC3_encode_init,
+    AC3_encode_frame,
+    NULL,
+};
diff --git a/libav/ac3enc.h b/libav/ac3enc.h
new file mode 100644
index 0000000000..40cc53aced
--- /dev/null
+++ b/libav/ac3enc.h
@@ -0,0 +1,32 @@
+
+#define AC3_FRAME_SIZE (6*256)
+#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
+#define AC3_MAX_CHANNELS 2 /* we handle at most two channels, although
+                              AC3 allows 6 channels */
+
+typedef struct AC3EncodeContext {
+    PutBitContext pb;
+    int nb_channels;
+    int bit_rate;
+    int sample_rate;
+    int bsid;
+    int frame_size_min; /* minimum frame size in case rounding is necessary */
+    int frame_size; /* current frame size in words */
+    int halfratecod;
+    int frmsizecod;
+    int fscod; /* frequency */
+    int acmod;
+    int bsmod;
+    short last_samples[AC3_MAX_CHANNELS][256];
+    int chbwcod[AC3_MAX_CHANNELS];
+    int nb_coefs[AC3_MAX_CHANNELS];
+    
+    /* bitrate allocation control */
+    int sgaincod, sdecaycod, fdecaycod, dbkneecod, floorcod; 
+    int sgain, sdecay, fdecay, dbknee, floor;
+    int csnroffst;
+    int fgaincod[AC3_MAX_CHANNELS];
+    int fsnroffst[AC3_MAX_CHANNELS];
+    /* mantissa encoding */
+    int mant1_cnt, mant2_cnt, mant4_cnt;
+} AC3EncodeContext;
diff --git a/libav/ac3tab.h b/libav/ac3tab.h
new file mode 100644
index 0000000000..2d379f0404
--- /dev/null
+++ b/libav/ac3tab.h
@@ -0,0 +1,180 @@
+/* tables taken directly from AC3 spec */
+
+/* possible bitrates */
+static const UINT16 bitratetab[19] = {
+    32, 40, 48, 56, 64, 80, 96, 112, 128, 
+    160, 192, 224, 256, 320, 384, 448, 512, 576, 640 
+};
+
+/* AC3 MDCT window */
+
+/* MDCT window */
+static const INT16 ac3_window[256]= {
+    4,    7,   12,   16,   21,   28,   34,   42,
+   51,   61,   72,   84,   97,  111,  127,  145,
+  164,  184,  207,  231,  257,  285,  315,  347,
+  382,  419,  458,  500,  544,  591,  641,  694,
+  750,  810,  872,  937, 1007, 1079, 1155, 1235,
+ 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016,
+ 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076,
+ 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444,
+ 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127,
+ 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112,
+ 8380, 8652, 8927, 9207, 9491, 9778,10069,10363,
+10660,10960,11264,11570,11879,12190,12504,12820,
+13138,13458,13780,14103,14427,14753,15079,15407,
+15735,16063,16392,16720,17049,17377,17705,18032,
+18358,18683,19007,19330,19651,19970,20287,20602,
+20914,21225,21532,21837,22139,22438,22733,23025,
+23314,23599,23880,24157,24430,24699,24964,25225,
+25481,25732,25979,26221,26459,26691,26919,27142,
+27359,27572,27780,27983,28180,28373,28560,28742,
+28919,29091,29258,29420,29577,29729,29876,30018,
+30155,30288,30415,30538,30657,30771,30880,30985,
+31086,31182,31274,31363,31447,31528,31605,31678,
+31747,31814,31877,31936,31993,32046,32097,32145,
+32190,32232,32272,32310,32345,32378,32409,32438,
+32465,32490,32513,32535,32556,32574,32592,32608,
+32623,32636,32649,32661,32671,32681,32690,32698,
+32705,32712,32718,32724,32729,32733,32737,32741,
+32744,32747,32750,32752,32754,32756,32757,32759,
+32760,32761,32762,32763,32764,32764,32765,32765,
+32766,32766,32766,32766,32767,32767,32767,32767,
+32767,32767,32767,32767,32767,32767,32767,32767,
+32767,32767,32767,32767,32767,32767,32767,32767,  
+};
+
+static UINT8 masktab[253];
+
+static const UINT8 latab[260]= {
+0x0040,0x003f,0x003e,0x003d,0x003c,0x003b,0x003a,0x0039,0x0038,0x0037,
+0x0036,0x0035,0x0034,0x0034,0x0033,0x0032,0x0031,0x0030,0x002f,0x002f,
+0x002e,0x002d,0x002c,0x002c,0x002b,0x002a,0x0029,0x0029,0x0028,0x0027,
+0x0026,0x0026,0x0025,0x0024,0x0024,0x0023,0x0023,0x0022,0x0021,0x0021,
+0x0020,0x0020,0x001f,0x001e,0x001e,0x001d,0x001d,0x001c,0x001c,0x001b,
+0x001b,0x001a,0x001a,0x0019,0x0019,0x0018,0x0018,0x0017,0x0017,0x0016,
+0x0016,0x0015,0x0015,0x0015,0x0014,0x0014,0x0013,0x0013,0x0013,0x0012,
+0x0012,0x0012,0x0011,0x0011,0x0011,0x0010,0x0010,0x0010,0x000f,0x000f,
+0x000f,0x000e,0x000e,0x000e,0x000d,0x000d,0x000d,0x000d,0x000c,0x000c,
+0x000c,0x000c,0x000b,0x000b,0x000b,0x000b,0x000a,0x000a,0x000a,0x000a,
+0x000a,0x0009,0x0009,0x0009,0x0009,0x0009,0x0008,0x0008,0x0008,0x0008,
+0x0008,0x0008,0x0007,0x0007,0x0007,0x0007,0x0007,0x0007,0x0006,0x0006,
+0x0006,0x0006,0x0006,0x0006,0x0006,0x0006,0x0005,0x0005,0x0005,0x0005,
+0x0005,0x0005,0x0005,0x0005,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
+0x0004,0x0004,0x0004,0x0004,0x0004,0x0003,0x0003,0x0003,0x0003,0x0003,
+0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0002,
+0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
+0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0001,0x0001,
+0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
+0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
+0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+};
+
+static const UINT16 hth[50][3]= {
+{ 0x04d0,0x04f0,0x0580 },
+{ 0x04d0,0x04f0,0x0580 },
+{ 0x0440,0x0460,0x04b0 },
+{ 0x0400,0x0410,0x0450 },
+{ 0x03e0,0x03e0,0x0420 },
+{ 0x03c0,0x03d0,0x03f0 },
+{ 0x03b0,0x03c0,0x03e0 },
+{ 0x03b0,0x03b0,0x03d0 },
+{ 0x03a0,0x03b0,0x03c0 },
+{ 0x03a0,0x03a0,0x03b0 },
+{ 0x03a0,0x03a0,0x03b0 },
+{ 0x03a0,0x03a0,0x03b0 },
+{ 0x03a0,0x03a0,0x03a0 },
+{ 0x0390,0x03a0,0x03a0 },
+{ 0x0390,0x0390,0x03a0 },
+{ 0x0390,0x0390,0x03a0 },
+{ 0x0380,0x0390,0x03a0 },
+{ 0x0380,0x0380,0x03a0 },
+{ 0x0370,0x0380,0x03a0 },
+{ 0x0370,0x0380,0x03a0 },
+{ 0x0360,0x0370,0x0390 },
+{ 0x0360,0x0370,0x0390 },
+{ 0x0350,0x0360,0x0390 },
+{ 0x0350,0x0360,0x0390 },
+{ 0x0340,0x0350,0x0380 },
+{ 0x0340,0x0350,0x0380 },
+{ 0x0330,0x0340,0x0380 },
+{ 0x0320,0x0340,0x0370 },
+{ 0x0310,0x0320,0x0360 },
+{ 0x0300,0x0310,0x0350 },
+{ 0x02f0,0x0300,0x0340 },
+{ 0x02f0,0x02f0,0x0330 },
+{ 0x02f0,0x02f0,0x0320 },
+{ 0x02f0,0x02f0,0x0310 },
+{ 0x0300,0x02f0,0x0300 },
+{ 0x0310,0x0300,0x02f0 },
+{ 0x0340,0x0320,0x02f0 },
+{ 0x0390,0x0350,0x02f0 },
+{ 0x03e0,0x0390,0x0300 },
+{ 0x0420,0x03e0,0x0310 },
+{ 0x0460,0x0420,0x0330 },
+{ 0x0490,0x0450,0x0350 },
+{ 0x04a0,0x04a0,0x03c0 },
+{ 0x0460,0x0490,0x0410 },
+{ 0x0440,0x0460,0x0470 },
+{ 0x0440,0x0440,0x04a0 },
+{ 0x0520,0x0480,0x0460 },
+{ 0x0800,0x0630,0x0440 },
+{ 0x0840,0x0840,0x0450 },
+{ 0x0840,0x0840,0x04e0 },
+};
+
+static const UINT8 baptab[64]= {
+    0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 
+    3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 
+    7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 
+    9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 
+    12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 
+    14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 
+    15, 15, 15, 15,
+};
+
+static const UINT8 sdecaytab[4]={ 
+    0x0f, 0x11, 0x13, 0x15,
+};
+
+static const UINT8 fdecaytab[4]={ 
+    0x3f, 0x53, 0x67, 0x7b, 
+};
+
+static const UINT16 sgaintab[4]= { 
+    0x540, 0x4d8, 0x478, 0x410,
+};
+
+static const UINT16 dbkneetab[4]= { 
+    0x000, 0x700, 0x900, 0xb00,
+};
+
+static const UINT16 floortab[8]= { 
+    0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800,
+};
+
+static const UINT16 fgaintab[8]= {
+    0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400,
+};
+
+static const UINT8 bndsz[50]={
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 
+    3, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 24, 24, 24, 24, 24 
+};
+
+static UINT8 bndtab[51]; 
+
+/* fft & mdct sin cos tables */
+static INT16 costab[64];
+static INT16 sintab[64];
+static INT16 fft_rev[512];
+static INT16 xcos1[128];
+static INT16 xsin1[128];
+
+static UINT16 crc_table[256];
diff --git a/libav/avcodec.h b/libav/avcodec.h
new file mode 100644
index 0000000000..299f81ab32
--- /dev/null
+++ b/libav/avcodec.h
@@ -0,0 +1,79 @@
+#include "common.h"
+
+enum CodecID {
+    CODEC_ID_NONE, 
+    CODEC_ID_MPEG1VIDEO,
+    CODEC_ID_H263,
+    CODEC_ID_RV10,
+    CODEC_ID_MP2,
+    CODEC_ID_AC3,
+    CODEC_ID_MJPEG,
+};
+
+enum CodecType {
+    CODEC_TYPE_VIDEO,
+    CODEC_TYPE_AUDIO,
+};
+    
+typedef struct AVEncodeContext {
+    int bit_rate;
+    int rate; /* frames per sec or samples per sec */
+
+    /* video only */
+    int width, height;
+    int gop_size; /* 0 = intra only */
+    
+    /* audio only */
+    int channels;
+
+    /* the following data should not be initialized */
+    int frame_size; /* in samples, initialized when calling 'init' */
+    int frame_number; /* audio or video frame number */
+    int key_frame;    /* true if the previous compressed frame was 
+                         a key frame (intra, or seekable) */
+    struct AVEncoder *codec;
+    void *priv_data;
+} AVEncodeContext;
+
+typedef struct AVEncoder {
+    char *name;
+    int type;
+    int id;
+    int priv_data_size;
+    int (*init)(AVEncodeContext *);
+    int (*encode)(AVEncodeContext *, UINT8 *buf, int buf_size, void *data);
+    int (*close)(AVEncodeContext *);
+    struct AVEncoder *next;
+} AVEncoder;
+
+extern AVEncoder ac3_encoder;
+extern AVEncoder mp2_encoder;
+extern AVEncoder mpeg1video_encoder;
+extern AVEncoder h263_encoder;
+extern AVEncoder rv10_encoder;
+extern AVEncoder mjpeg_encoder;
+
+/* resample.c */
+
+typedef struct {
+    /* fractional resampling */
+    UINT32 incr; /* fractional increment */
+    UINT32 frac;
+    int last_sample;
+    /* integer down sample */
+    int iratio;  /* integer divison ratio */
+    int icount, isum;
+    int inv;
+} ReSampleChannelContext;
+
+typedef struct {
+    ReSampleChannelContext channel_ctx[2];
+    float ratio;
+    /* channel convert */
+    int input_channels, output_channels;
+} ReSampleContext;
+
+int audio_resample_init(ReSampleContext *s, 
+                        int output_channels, int input_channels, 
+                        int output_rate, int input_rate);
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
diff --git a/libav/common.c b/libav/common.c
new file mode 100644
index 0000000000..e60b0dd85b
--- /dev/null
+++ b/libav/common.c
@@ -0,0 +1,174 @@
+/*
+ * Common bit/dsp utils
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <math.h>
+#include "common.h"
+
+#define NDEBUG
+#include <assert.h>
+
+void init_put_bits(PutBitContext *s, 
+                   UINT8 *buffer, int buffer_size,
+                   void *opaque,
+                   void (*write_data)(void *, UINT8 *, int))
+{
+    s->buf = buffer;
+    s->buf_ptr = s->buf;
+    s->buf_end = s->buf + buffer_size;
+    s->bit_cnt=0;
+    s->bit_buf=0;
+    s->data_out_size = 0;
+    s->write_data = write_data;
+    s->opaque = opaque;
+}
+
+static void flush_buffer(PutBitContext *s)
+{
+    int size;
+    if (s->write_data) {
+        size = s->buf_ptr - s->buf;
+        if (size > 0)
+            s->write_data(s->opaque, s->buf, size);
+        s->buf_ptr = s->buf;
+        s->data_out_size += size;
+    }
+}
+
+void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf;
+    int bit_cnt;
+
+    assert(n == 32 || value < (1U << n));
+
+    bit_buf = s->bit_buf;
+    bit_cnt = s->bit_cnt;
+
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+    if (n < (32-bit_cnt)) {
+        bit_buf |= value << (32 - n - bit_cnt);
+        bit_cnt+=n;
+    } else {
+        bit_buf |= value >> (n + bit_cnt - 32);
+        *(UINT32 *)s->buf_ptr = htonl(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        if (s->buf_ptr >= s->buf_end)
+            flush_buffer(s);
+        bit_cnt=bit_cnt + n - 32;
+        if (bit_cnt == 0) {
+            bit_buf = 0;
+        } else {
+            bit_buf = value << (32 - bit_cnt);
+        }
+    }
+    
+    s->bit_buf = bit_buf;
+    s->bit_cnt = bit_cnt;
+}
+
+/* return the number of bits output */
+long long get_bit_count(PutBitContext *s)
+{
+    return (s->buf_ptr - s->buf + s->data_out_size) * 8 + (long long)s->bit_cnt;
+}
+
+void align_put_bits(PutBitContext *s)
+{
+    put_bits(s,(8 - s->bit_cnt) & 7,0);
+}
+
+/* pad the end of the output stream with zeros */
+void flush_put_bits(PutBitContext *s)
+{
+    while (s->bit_cnt > 0) {
+        /* XXX: should test end of buffer */
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+        s->bit_cnt-=8;
+    }
+    flush_buffer(s);
+    s->bit_cnt=0;
+    s->bit_buf=0;
+}
+
+/* for jpeg : espace 0xff with 0x00 after it */
+void jput_bits(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf, b;
+    int bit_cnt, i;
+    
+    assert(n == 32 || value < (1U << n));
+
+    bit_buf = s->bit_buf;
+    bit_cnt = s->bit_cnt;
+
+    //printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+    if (n < (32-bit_cnt)) {
+        bit_buf |= value << (32 - n - bit_cnt);
+        bit_cnt+=n;
+    } else {
+        bit_buf |= value >> (n + bit_cnt - 32);
+        /* handle escape */
+        for(i=0;i<4;i++) {
+            b = (bit_buf >> 24);
+            *(s->buf_ptr++) = b;
+            if (b == 0xff)
+                *(s->buf_ptr++) = 0;
+            bit_buf <<= 8;
+        }
+        /* we flush the buffer sooner to handle worst case */
+        if (s->buf_ptr >= (s->buf_end - 8))
+            flush_buffer(s);
+
+        bit_cnt=bit_cnt + n - 32;
+        if (bit_cnt == 0) {
+            bit_buf = 0;
+        } else {
+            bit_buf = value << (32 - bit_cnt);
+        }
+    }
+    
+    s->bit_buf = bit_buf;
+    s->bit_cnt = bit_cnt;
+}
+
+/* pad the end of the output stream with zeros */
+void jflush_put_bits(PutBitContext *s)
+{
+    unsigned int b;
+
+    while (s->bit_cnt > 0) {
+        b = s->bit_buf >> 24;
+        *s->buf_ptr++ = b;
+        if (b == 0xff)
+            *s->buf_ptr++ = 0;
+        s->bit_buf<<=8;
+        s->bit_cnt-=8;
+    }
+    flush_buffer(s);
+    s->bit_cnt=0;
+    s->bit_buf=0;
+}
+
diff --git a/libav/common.h b/libav/common.h
new file mode 100644
index 0000000000..18473eb8e8
--- /dev/null
+++ b/libav/common.h
@@ -0,0 +1,68 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+typedef unsigned char UINT8;
+typedef unsigned short UINT16;
+typedef unsigned int UINT32;
+typedef signed char INT8;
+typedef signed short INT16;
+typedef signed int INT32;
+
+/* bit I/O */
+
+struct PutBitContext;
+
+typedef void (*WriteDataFunc)(void *, UINT8 *, int);
+
+typedef struct PutBitContext {
+    UINT8 *buf, *buf_ptr, *buf_end;
+    int bit_cnt;
+    UINT32 bit_buf;
+    long long data_out_size; /* in bytes */
+    void *opaque;
+    WriteDataFunc write_data;
+} PutBitContext;
+
+void init_put_bits(PutBitContext *s, 
+                   UINT8 *buffer, int buffer_size,
+                   void *opaque,
+                   void (*write_data)(void *, UINT8 *, int));
+void put_bits(PutBitContext *s, int n, unsigned int value);
+long long get_bit_count(PutBitContext *s);
+void align_put_bits(PutBitContext *s);
+void flush_put_bits(PutBitContext *s);
+
+/* jpeg specific put_bits */
+void jput_bits(PutBitContext *s, int n, unsigned int value);
+void jflush_put_bits(PutBitContext *s);
+
+/* misc math functions */
+
+extern inline int log2(unsigned int v)
+{
+    int n;
+
+    n = 0;
+    if (v & 0xffff0000) {
+        v >>= 16;
+        n += 16;
+    }
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    if (v & 0xf0) {
+        v >>= 4;
+        n += 4;
+    }
+    if (v & 0xc) {
+        v >>= 2;
+        n += 2;
+    }
+    if (v & 0x2) {
+        n++;
+    }
+    return n;
+}
+
+#endif
diff --git a/libav/h263data.h b/libav/h263data.h
new file mode 100644
index 0000000000..1cf6f4d802
--- /dev/null
+++ b/libav/h263data.h
@@ -0,0 +1,151 @@
+/* DCT coefficients. Four tables, two for last = 0, two for last = 1.
+   the sign bit must be added afterwards. */
+
+/* first part of coeffs for last = 0. Indexed by [run][level-1] */
+
+static const UINT8 coeff_tab0[2][12][2] =
+{
+  /* run = 0 */
+  {
+    {0x02, 2}, {0x0f, 4}, {0x15, 6}, {0x17, 7},
+    {0x1f, 8}, {0x25, 9}, {0x24, 9}, {0x21,10},
+    {0x20,10}, {0x07,11}, {0x06,11}, {0x20,11}
+  },
+  /* run = 1 */
+  {
+    {0x06, 3}, {0x14, 6}, {0x1e, 8}, {0x0f,10},
+    {0x21,11}, {0x50,12}, {0x00, 0}, {0x00, 0},
+    {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  }
+};
+
+/* rest of coeffs for last = 0. indexing by [run-2][level-1] */
+
+static const UINT8 coeff_tab1[25][4][2] =
+{
+  /* run = 2 */
+  {
+    {0x0e, 4}, {0x1d, 8}, {0x0e,10}, {0x51,12}
+  },
+  /* run = 3 */
+  {
+    {0x0d, 5}, {0x23, 9}, {0x0d,10}, {0x00, 0}
+  },
+  /* run = 4-26 */
+  {
+    {0x0c, 5}, {0x22, 9}, {0x52,12}, {0x00, 0}
+  },
+  {
+    {0x0b, 5}, {0x0c,10}, {0x53,12}, {0x00, 0}
+  },
+  {
+    {0x13, 6}, {0x0b,10}, {0x54,12}, {0x00, 0}
+  },
+  {
+    {0x12, 6}, {0x0a,10}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x11, 6}, {0x09,10}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x10, 6}, {0x08,10}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x16, 7}, {0x55,12}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x15, 7}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x14, 7}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1c, 8}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1b, 8}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x21, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x20, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1f, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1e, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1d, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1c, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1b, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x1a, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x22,11}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x23,11}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x56,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  },
+  {
+    {0x57,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}
+  }
+};
+
+/* first coeffs of last = 1. indexing by [run][level-1] */
+
+static const UINT8 coeff_tab2[2][3][2] =
+{
+  /* run = 0 */
+  {
+    {0x07, 4}, {0x19, 9}, {0x05,11}
+  },
+  /* run = 1 */
+  {
+    {0x0f, 6}, {0x04,11}, {0x00, 0}
+  }
+};
+
+/* rest of coeffs for last = 1. indexing by [run-2] */
+
+static const UINT8 coeff_tab3[40][2] =
+{
+  {0x0e, 6}, {0x0d, 6}, {0x0c, 6},
+  {0x13, 7}, {0x12, 7}, {0x11, 7}, {0x10, 7},
+  {0x1a, 8}, {0x19, 8}, {0x18, 8}, {0x17, 8},
+  {0x16, 8}, {0x15, 8}, {0x14, 8}, {0x13, 8},
+  {0x18, 9}, {0x17, 9}, {0x16, 9}, {0x15, 9},    
+  {0x14, 9}, {0x13, 9}, {0x12, 9}, {0x11, 9},    
+  {0x07,10}, {0x06,10}, {0x05,10}, {0x04,10},    
+  {0x24,11}, {0x25,11}, {0x26,11}, {0x27,11},    
+  {0x58,12}, {0x59,12}, {0x5a,12}, {0x5b,12},    
+  {0x5c,12}, {0x5d,12}, {0x5e,12}, {0x5f,12},
+  {0x00, 0}               
+};
+
+/* intra MCBPC, mb_type = 3 */
+static UINT8 intra_MCBPC_code[4] = { 1, 1, 2, 3 };
+static UINT8 intra_MCBPC_bits[4] = { 1, 3, 3, 3 };
+
+/* inter MCBPC, mb_type = 0 then 3 */
+static UINT8 inter_MCBPC_code[8] = { 1, 3, 2, 5, 3, 4, 3, 3 };
+static UINT8 inter_MCBPC_bits[8] = { 1, 4, 4, 6, 5, 8, 8, 7 };
+
+static UINT8 cbpy_tab[16][2] =
+{
+  {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4},
+  {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2}
+};
+
+
diff --git a/libav/h263enc.c b/libav/h263enc.c
new file mode 100644
index 0000000000..59db1ee512
--- /dev/null
+++ b/libav/h263enc.c
@@ -0,0 +1,229 @@
+/*
+ * H263 backend for ffmpeg encoder
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <netinet/in.h>
+#include "common.h"
+#include "mpegvideo.h"
+#include "h263data.h"
+
+void h263_picture_header(MpegEncContext *s, int picture_number)
+{
+    int format;
+
+    align_put_bits(&s->pb);
+    put_bits(&s->pb, 22, 0x20);
+    put_bits(&s->pb, 8, ((s->picture_number * 30) / s->frame_rate) & 0xff); 
+
+    put_bits(&s->pb, 1, 1); /* marker */
+    put_bits(&s->pb, 1, 0); /* h263 id */
+    put_bits(&s->pb, 1, 0); /* split screen off */
+    put_bits(&s->pb, 1, 0); /* camera  off */
+    put_bits(&s->pb, 1, 0); /* freeze picture release off */
+
+    if (s->width == 128 && s->height == 96)
+        format = 1;
+    else if (s->width == 176 && s->height == 144)
+        format = 2;
+    else if (s->width == 352 && s->height == 288)
+        format = 3;
+    else if (s->width == 704 && s->height == 576)
+        format = 4;
+    else if (s->width == 1408 && s->height == 1152)
+        format = 5;
+    else
+        abort();
+
+    put_bits(&s->pb, 3, format);
+    
+    put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
+
+    put_bits(&s->pb, 1, 0); /* unrestricted motion vector: off */
+
+    put_bits(&s->pb, 1, 0); /* SAC: off */
+
+    put_bits(&s->pb, 1, 0); /* advanced prediction mode: off */
+
+    put_bits(&s->pb, 1, 0); /* not PB frame */
+
+    put_bits(&s->pb, 5, s->qscale);
+    
+    put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
+    
+    put_bits(&s->pb, 1, 0); /* no PEI */
+}
+
+static void h263_encode_block(MpegEncContext *s, DCTELEM *block, 
+                              int n);
+
+void h263_encode_mb(MpegEncContext *s, 
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
+{
+    int cbpc, cbpy, i, cbp;
+
+    if (!s->mb_intra) {
+        /* compute cbp */
+        cbp = 0;
+        for(i=0;i<6;i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+        if ((cbp | motion_x | motion_y) == 0) {
+            /* skip macroblock */
+            put_bits(&s->pb, 1, 1);
+            return;
+        }
+            
+        put_bits(&s->pb, 1, 0); /* mb coded */
+        cbpc = cbp & 3;
+        put_bits(&s->pb, 
+                 inter_MCBPC_bits[cbpc], 
+                 inter_MCBPC_code[cbpc]);
+        cbpy = cbp >> 2;
+        cbpy ^= 0xf;
+        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        
+        /* motion vectors: zero */
+        put_bits(&s->pb, 1, 1);
+        put_bits(&s->pb, 1, 1);
+
+    } else {
+        /* compute cbp */
+        cbp = 0;
+        for(i=0;i<6;i++) {
+            if (s->block_last_index[i] >= 1)
+                cbp |= 1 << (5 - i);
+        }
+
+        cbpc = cbp & 3;
+        if (s->pict_type == I_TYPE) {
+            put_bits(&s->pb, 
+                     intra_MCBPC_bits[cbpc], 
+                     intra_MCBPC_code[cbpc]);
+        } else {
+            put_bits(&s->pb, 1, 0); /* mb coded */
+            put_bits(&s->pb, 
+                     inter_MCBPC_bits[cbpc + 4], 
+                     inter_MCBPC_code[cbpc + 4]);
+        }
+        cbpy = cbp >> 2;
+        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+    }
+    
+    /* encode each block */
+    for(i=0;i<6;i++) {
+        h263_encode_block(s, block[i], i);
+    }
+}
+
+static void h263_encode_block(MpegEncContext *s, DCTELEM *block, int n)
+{
+    int level, run, last, i, j, last_index, last_non_zero, sign, alevel;
+    int code, len;
+
+    if (s->mb_intra) {
+        /* DC coef */
+        level = block[0];
+        if (level == 128)
+            put_bits(&s->pb, 8, 0xff);
+        else
+            put_bits(&s->pb, 8, level & 0xff);
+        i = 1;
+    } else {
+        i = 0;
+    }
+    
+    /* AC coefs */
+    last_index = s->block_last_index[n];
+    last_non_zero = i - 1;
+    for(;i<=last_index;i++) {
+        j = zigzag_direct[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            alevel = level;
+            if (level < 0) {
+                sign = 1;
+                alevel = -level;
+            }
+            len = 0;
+            code = 0; /* only to disable warning */
+            if (last == 0) {
+                if (run < 2 && alevel < 13 ) {
+                    len = coeff_tab0[run][alevel-1][1];
+                    code = coeff_tab0[run][alevel-1][0];
+                } else if (run >= 2 && run < 27 && alevel < 5) {
+                    len = coeff_tab1[run-2][alevel-1][1];
+                    code = coeff_tab1[run-2][alevel-1][0];
+                }
+            } else {
+                if (run < 2 && alevel < 4) {
+                    len = coeff_tab2[run][alevel-1][1];
+                    code = coeff_tab2[run][alevel-1][0];
+                } else if (run >= 2 && run < 42 && alevel == 1) {
+                    len = coeff_tab3[run-2][1];
+                    code = coeff_tab3[run-2][0];
+                }
+            }
+            
+            if (len != 0) {
+                code = (code << 1) | sign;
+                put_bits(&s->pb, len + 1, code);
+            } else {
+                    /* escape */
+                    put_bits(&s->pb, 7, 3);
+                    put_bits(&s->pb, 1, last);
+                    put_bits(&s->pb, 6, run);
+                    put_bits(&s->pb, 8, level & 0xff);
+            }
+
+            last_non_zero = i;
+        }
+    }
+}
+
+/* write RV 1.0 compatible frame header */
+void rv10_encode_picture_header(MpegEncContext *s, int picture_number)
+{
+    align_put_bits(&s->pb);
+
+    put_bits(&s->pb, 1, 1); /* marker */
+
+    put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
+
+    put_bits(&s->pb, 1, 0); /* not PB frame */
+
+    put_bits(&s->pb, 5, s->qscale);
+    
+    if (s->pict_type == I_TYPE) {
+        /* specific MPEG like DC coding not used */
+    }
+    
+    /* if multiple packets per frame are sent, the position at which
+       to display the macro blocks is coded here */
+    put_bits(&s->pb, 6, 0); /* mb_x */
+    put_bits(&s->pb, 6, 0); /* mb_y */
+    put_bits(&s->pb, 12, s->mb_width * s->mb_height);
+    
+    put_bits(&s->pb, 3, 0); /* ignored */
+}
+
diff --git a/libav/jfdctfst.c b/libav/jfdctfst.c
new file mode 100644
index 0000000000..620a03078c
--- /dev/null
+++ b/libav/jfdctfst.c
@@ -0,0 +1,224 @@
+/*
+ * jfdctfst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "common.h"
+#include "mpegvideo.h"
+
+#define DCTSIZE 8
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+#define SHIFT_TEMPS
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_ifast (DCTELEM * data)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
diff --git a/libav/jrevdct.c b/libav/jrevdct.c
new file mode 100644
index 0000000000..26715b0b18
--- /dev/null
+++ b/libav/jrevdct.c
@@ -0,0 +1,1584 @@
+/*
+ * jrevdct.c
+ *
+ * Copyright (C) 1991, 1992, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the basic inverse-DCT transformation subroutine.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * I've made lots of modifications to attempt to take advantage of the
+ * sparse nature of the DCT matrices we're getting.  Although the logic
+ * is cumbersome, it's straightforward and the resulting code is much
+ * faster.
+ *
+ * A better way to do this would be to pass in the DCT block as a sparse
+ * matrix, perhaps with the difference cases encoded.
+ */
+
+typedef int INT32;
+
+/* Definition of Contant integer scale factor. */
+#define CONST_BITS 13
+
+/* Misc DCT definitions */
+#define DCTSIZE		8	/* The basic DCT block is 8x8 samples */
+#define DCTSIZE2	64	/* DCTSIZE squared; # of elements in a block */
+
+#define GLOBAL			/* a function referenced thru EXTERNs */
+
+typedef int DCTELEM;
+typedef DCTELEM DCTBLOCK[DCTSIZE2];
+
+void j_rev_dct (DCTELEM *data);
+
+
+#define GLOBAL			/* a function referenced thru EXTERNs */
+#define ORIG_DCT	1
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+/*
+ * This routine is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#ifdef EIGHT_BIT_SAMPLES
+#define PASS1_BITS  2
+#else
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+#define ONE	((INT32) 1)
+
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
+ * you will pay a significant penalty in run time.  In that case, figure
+ * the correct integer constant values and insert them by hand.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+#define SCALE(x,n)	((INT32)(x) << n)
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
+ * this provides a useful speedup on many machines.
+ * There is no way to specify a 16x16->32 multiply in portable C, but
+ * some C compilers will do the right thing if you provide the correct
+ * combination of casts.
+ * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#ifdef EIGHT_BIT_SAMPLES
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+#endif
+
+#if 0
+/* force a multiplication for x86 where a multiply is fast). We
+   force the non constant operand to be in a register because
+   otherwise it may be a 16 bit memory reference, which is not allowed
+   by imull */
+#define MULTIPLY(a,b) \
+({\
+    int res;\
+    asm("imull %2,%1,%0" : "=r" (res) : "r" ((int)(a)), "i" (b));\
+    res;\
+})
+#endif
+
+#ifndef MULTIPLY		/* default definition */
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+#ifndef ORIG_DCT
+
+#undef SSMUL
+#define SSMUL(var1,var2)  ((INT16)(var1) * (INT32)(INT16)(var2))
+
+/* Precomputed idct value arrays. */
+
+STATIC DCTELEM PreIDCT[64][64];
+
+/* Pre compute singleton coefficient IDCT values. */
+void init_pre_idct() {
+    int i;
+
+    for (i = 0; i < 64; i++) {
+	memset ((char *) PreIDCT[i], 0, 64 * sizeof(DCTELEM));
+	PreIDCT[i][i] = 2048;
+	j_rev_dct (PreIDCT[i]);
+    }
+}
+
+/*
+ * Perform the inverse DCT on one block of coefficients.
+ */
+
+void j_rev_dct_sparse (data, pos)
+	DCTBLOCK data;
+	int pos;
+{
+    register DCTELEM *dataptr;
+    short int val;
+    DCTELEM *ndataptr;
+    int coeff, rr;
+
+    /* If DC Coefficient. */
+
+    if (pos == 0) {
+	register INT32 *dp;
+	register INT32 v;
+
+	dp = (INT32*)data;
+	v = *data;
+	/* Compute 32 bit value to assign.
+	 *  This speeds things up a bit */
+	if (v < 0)
+	    val = (short)((v - 3) >> 3);
+	else
+	    val = (short)((v + 4) >> 3);
+	v = val | ((INT32)val << 16);
+	dp[0] = v;      dp[1] = v;      dp[2] = v;      dp[3] = v;
+	dp[4] = v;      dp[5] = v;      dp[6] = v;      dp[7] = v;
+	dp[8] = v;      dp[9] = v;      dp[10] = v;     dp[11] = v;
+	dp[12] = v;     dp[13] = v;     dp[14] = v;     dp[15] = v;
+	dp[16] = v;     dp[17] = v;     dp[18] = v;     dp[19] = v;
+	dp[20] = v;     dp[21] = v;     dp[22] = v;     dp[23] = v;
+	dp[24] = v;     dp[25] = v;     dp[26] = v;     dp[27] = v;
+	dp[28] = v;     dp[29] = v;     dp[30] = v;     dp[31] = v;
+	return;
+    }
+
+    /* Some other coefficient. */
+    dataptr = (DCTELEM *)data;
+    coeff = dataptr[pos];
+    ndataptr = PreIDCT[pos];
+
+    for (rr = 0; rr < 4; rr++) {
+	dataptr[0]  = (DCTELEM)(SSMUL (ndataptr[0] , coeff) >> (CONST_BITS-2));
+	dataptr[1]  = (DCTELEM)(SSMUL (ndataptr[1] , coeff) >> (CONST_BITS-2));
+	dataptr[2]  = (DCTELEM)(SSMUL (ndataptr[2] , coeff) >> (CONST_BITS-2));
+	dataptr[3]  = (DCTELEM)(SSMUL (ndataptr[3] , coeff) >> (CONST_BITS-2));
+	dataptr[4]  = (DCTELEM)(SSMUL (ndataptr[4] , coeff) >> (CONST_BITS-2));
+	dataptr[5]  = (DCTELEM)(SSMUL (ndataptr[5] , coeff) >> (CONST_BITS-2));
+	dataptr[6]  = (DCTELEM)(SSMUL (ndataptr[6] , coeff) >> (CONST_BITS-2));
+	dataptr[7]  = (DCTELEM)(SSMUL (ndataptr[7] , coeff) >> (CONST_BITS-2));
+	dataptr[8]  = (DCTELEM)(SSMUL (ndataptr[8] , coeff) >> (CONST_BITS-2));
+	dataptr[9]  = (DCTELEM)(SSMUL (ndataptr[9] , coeff) >> (CONST_BITS-2));
+	dataptr[10] = (DCTELEM)(SSMUL (ndataptr[10], coeff) >> (CONST_BITS-2));
+	dataptr[11] = (DCTELEM)(SSMUL (ndataptr[11], coeff) >> (CONST_BITS-2));
+	dataptr[12] = (DCTELEM)(SSMUL (ndataptr[12], coeff) >> (CONST_BITS-2));
+	dataptr[13] = (DCTELEM)(SSMUL (ndataptr[13], coeff) >> (CONST_BITS-2));
+	dataptr[14] = (DCTELEM)(SSMUL (ndataptr[14], coeff) >> (CONST_BITS-2));
+	dataptr[15] = (DCTELEM)(SSMUL (ndataptr[15], coeff) >> (CONST_BITS-2));
+	dataptr += 16;
+	ndataptr += 16;
+    }
+}
+
+
+void j_rev_dct (data)
+	DCTBLOCK data;
+{
+    INT32 tmp0, tmp1, tmp2, tmp3;
+    INT32 tmp10, tmp11, tmp12, tmp13;
+    INT32 z1, z2, z3, z4, z5;
+    int d0, d1, d2, d3, d4, d5, d6, d7;
+    register DCTELEM *dataptr;
+    int rowctr;
+    SHIFT_TEMPS;
+
+    /* Pass 1: process rows. */
+    /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+    /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+    dataptr = data;
+
+    for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) {
+	/* Due to quantization, we will usually find that many of the input
+	 * coefficients are zero, especially the AC terms.  We can exploit this
+	 * by short-circuiting the IDCT calculation for any row in which all
+	 * the AC terms are zero.  In that case each output is equal to the
+	 * DC coefficient (with scale factor as needed).
+	 * With typical images and quantization tables, half or more of the
+	 * row DCT calculations can be simplified this way.
+	 */
+
+	register INT32 *idataptr = (INT32*)dataptr;
+	d0 = dataptr[0];
+	d1 = dataptr[1];
+	if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
+	    /* AC terms all zero */
+	    if (d0) {
+		/* Compute a 32 bit value to assign. */
+		DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+		register INT32 v = (dcval & 0xffff) |
+				   (((INT32)dcval << 16) & 0xffff0000L);
+
+		idataptr[0] = v;
+		idataptr[1] = v;
+		idataptr[2] = v;
+		idataptr[3] = v;
+	    }
+
+	    dataptr += DCTSIZE;	/* advance pointer to next row */
+	    continue;
+	}
+	d2 = dataptr[2];
+	d3 = dataptr[3];
+	d4 = dataptr[4];
+	d5 = dataptr[5];
+	d6 = dataptr[6];
+	d7 = dataptr[7];
+
+	/* Even part: reverse the even part of the forward DCT. */
+	/* The rotator is sqrt(2)*c(-6). */
+	if (d6) {
+	    if (d4) {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp0 = SCALE (d0 + d4, CONST_BITS);
+			tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp1 + tmp2;
+			tmp12 = tmp1 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp0 = SCALE (d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp2 - tmp0;
+			tmp12 = -(tmp0 + tmp2);
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, - FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp0 = SCALE (d0 + d4, CONST_BITS);
+			tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp1 + tmp2;
+			tmp12 = tmp1 - tmp2;
+		    } else {
+			/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, -FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp0 = SCALE (d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp2 - tmp0;
+			tmp12 = -(tmp0 + tmp2);
+		    }
+		}
+	    } else {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp0 = SCALE (d0, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp0 + tmp2;
+			tmp12 = tmp0 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp10 = tmp3;
+			tmp13 = -tmp3;
+			tmp11 = tmp2;
+			tmp12 = -tmp2;
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, - FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp0 = SCALE (d0, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp0 + tmp2;
+			tmp12 = tmp0 - tmp2;
+		    } else {
+			/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, - FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp10 = tmp3;
+			tmp13 = -tmp3;
+			tmp11 = tmp2;
+			tmp12 = -tmp2;
+		    }
+		}
+	    }
+	} else {
+	    if (d4) {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp0 = SCALE (d0 + d4, CONST_BITS);
+			tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp1 + tmp2;
+			tmp12 = tmp1 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp0 = SCALE (d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp2 - tmp0;
+			tmp12 = -(tmp0 + tmp2);
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+			tmp10 = tmp13 = SCALE (d0 + d4, CONST_BITS);
+			tmp11 = tmp12 = SCALE (d0 - d4, CONST_BITS);
+		    } else {
+			/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
+			tmp10 = tmp13 = SCALE (d4, CONST_BITS);
+			tmp11 = tmp12 = -tmp10;
+		    }
+		}
+	    } else {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp0 = SCALE (d0, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp0 + tmp2;
+			tmp12 = tmp0 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp10 = tmp3;
+			tmp13 = -tmp3;
+			tmp11 = tmp2;
+			tmp12 = -tmp2;
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
+			tmp10 = tmp13 = tmp11 = tmp12 = SCALE (d0, CONST_BITS);
+		    } else {
+			/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
+			tmp10 = tmp13 = tmp11 = tmp12 = 0;
+		    }
+		}
+	    }
+	}
+
+
+	/* Odd part per figure 8; the matrix is unitary and hence its
+	 * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+	 */
+
+	if (d7) {
+	    if (d5) {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+			z1 = d7 + d1;
+			z2 = d5 + d3;
+			z3 = d7 + d3;
+			z4 = d5 + d1;
+			z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(z1, - FIX(0.899976223));
+			z2 = MULTIPLY(z2, - FIX(2.562915447));
+			z3 = MULTIPLY(z3, - FIX(1.961570560));
+			z4 = MULTIPLY(z4, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+			z1 = d7;
+			z2 = d5 + d3;
+			z3 = d7 + d3;
+			z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			z1 = MULTIPLY(d7, - FIX(0.899976223));
+			z2 = MULTIPLY(z2, - FIX(2.562915447));
+			z3 = MULTIPLY(z3, - FIX(1.961570560));
+			z4 = MULTIPLY(d5, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 = z1 + z4;
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+			z1 = d7 + d1;
+			z2 = d5;
+			z3 = d7;
+			z4 = d5 + d1;
+			z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(z1, - FIX(0.899976223));
+			z2 = MULTIPLY(d5, - FIX(2.562915447));
+			z3 = MULTIPLY(d7, - FIX(1.961570560));
+			z4 = MULTIPLY(z4, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 = z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+			tmp0 = MULTIPLY(d7, - FIX(0.601344887));
+			z1 = MULTIPLY(d7, - FIX(0.899976223));
+			z3 = MULTIPLY(d7, - FIX(1.961570560));
+			tmp1 = MULTIPLY(d5, - FIX(0.509795578));
+			z2 = MULTIPLY(d5, - FIX(2.562915447));
+			z4 = MULTIPLY(d5, - FIX(0.390180644));
+			z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z3;
+			tmp1 += z4;
+			tmp2 = z2 + z3;
+			tmp3 = z1 + z4;
+		    }
+		}
+	    } else {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+			z1 = d7 + d1;
+			z3 = d7 + d3;
+			z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(z1, - FIX(0.899976223));
+			z2 = MULTIPLY(d3, - FIX(2.562915447));
+			z3 = MULTIPLY(z3, - FIX(1.961570560));
+			z4 = MULTIPLY(d1, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 = z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+			z3 = d7 + d3;
+
+			tmp0 = MULTIPLY(d7, - FIX(0.601344887));
+			z1 = MULTIPLY(d7, - FIX(0.899976223));
+			tmp2 = MULTIPLY(d3, FIX(0.509795579));
+			z2 = MULTIPLY(d3, - FIX(2.562915447));
+			z5 = MULTIPLY(z3, FIX(1.175875602));
+			z3 = MULTIPLY(z3, - FIX(0.785694958));
+
+			tmp0 += z3;
+			tmp1 = z2 + z5;
+			tmp2 += z3;
+			tmp3 = z1 + z5;
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+			z1 = d7 + d1;
+			z5 = MULTIPLY(z1, FIX(1.175875602));
+
+			z1 = MULTIPLY(z1, FIX(0.275899379));
+			z3 = MULTIPLY(d7, - FIX(1.961570560));
+			tmp0 = MULTIPLY(d7, - FIX(1.662939224));
+			z4 = MULTIPLY(d1, - FIX(0.390180644));
+			tmp3 = MULTIPLY(d1, FIX(1.111140466));
+
+			tmp0 += z1;
+			tmp1 = z4 + z5;
+			tmp2 = z3 + z5;
+			tmp3 += z1;
+		    } else {
+			/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+			tmp0 = MULTIPLY(d7, - FIX(1.387039845));
+			tmp1 = MULTIPLY(d7, FIX(1.175875602));
+			tmp2 = MULTIPLY(d7, - FIX(0.785694958));
+			tmp3 = MULTIPLY(d7, FIX(0.275899379));
+		    }
+		}
+	    }
+	} else {
+	    if (d5) {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+			z2 = d5 + d3;
+			z4 = d5 + d1;
+			z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
+
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(d1, - FIX(0.899976223));
+			z2 = MULTIPLY(z2, - FIX(2.562915447));
+			z3 = MULTIPLY(d3, - FIX(1.961570560));
+			z4 = MULTIPLY(z4, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 = z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+			z2 = d5 + d3;
+
+			z5 = MULTIPLY(z2, FIX(1.175875602));
+			tmp1 = MULTIPLY(d5, FIX(1.662939225));
+			z4 = MULTIPLY(d5, - FIX(0.390180644));
+			z2 = MULTIPLY(z2, - FIX(1.387039845));
+			tmp2 = MULTIPLY(d3, FIX(1.111140466));
+			z3 = MULTIPLY(d3, - FIX(1.961570560));
+
+			tmp0 = z3 + z5;
+			tmp1 += z2;
+			tmp2 += z2;
+			tmp3 = z4 + z5;
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+			z4 = d5 + d1;
+
+			z5 = MULTIPLY(z4, FIX(1.175875602));
+			z1 = MULTIPLY(d1, - FIX(0.899976223));
+			tmp3 = MULTIPLY(d1, FIX(0.601344887));
+			tmp1 = MULTIPLY(d5, - FIX(0.509795578));
+			z2 = MULTIPLY(d5, - FIX(2.562915447));
+			z4 = MULTIPLY(z4, FIX(0.785694958));
+
+			tmp0 = z1 + z5;
+			tmp1 += z4;
+			tmp2 = z2 + z5;
+			tmp3 += z4;
+		    } else {
+			/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+			tmp0 = MULTIPLY(d5, FIX(1.175875602));
+			tmp1 = MULTIPLY(d5, FIX(0.275899380));
+			tmp2 = MULTIPLY(d5, - FIX(1.387039845));
+			tmp3 = MULTIPLY(d5, FIX(0.785694958));
+		    }
+		}
+	    } else {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+			z5 = d1 + d3;
+			tmp3 = MULTIPLY(d1, FIX(0.211164243));
+			tmp2 = MULTIPLY(d3, - FIX(1.451774981));
+			z1 = MULTIPLY(d1, FIX(1.061594337));
+			z2 = MULTIPLY(d3, - FIX(2.172734803));
+			z4 = MULTIPLY(z5, FIX(0.785694958));
+			z5 = MULTIPLY(z5, FIX(1.175875602));
+
+			tmp0 = z1 - z4;
+			tmp1 = z2 + z4;
+			tmp2 += z5;
+			tmp3 += z5;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+			tmp0 = MULTIPLY(d3, - FIX(0.785694958));
+			tmp1 = MULTIPLY(d3, - FIX(1.387039845));
+			tmp2 = MULTIPLY(d3, - FIX(0.275899379));
+			tmp3 = MULTIPLY(d3, FIX(1.175875602));
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+			tmp0 = MULTIPLY(d1, FIX(0.275899379));
+			tmp1 = MULTIPLY(d1, FIX(0.785694958));
+			tmp2 = MULTIPLY(d1, FIX(1.175875602));
+			tmp3 = MULTIPLY(d1, FIX(1.387039845));
+		    } else {
+			/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+			tmp0 = tmp1 = tmp2 = tmp3 = 0;
+		    }
+		}
+	    }
+	}
+
+	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+	dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+	dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+	dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+	dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+	dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+	dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+	dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+	dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+	dataptr += DCTSIZE;		/* advance pointer to next row */
+    }
+
+    /* Pass 2: process columns. */
+    /* Note that we must descale the results by a factor of 8 == 2**3, */
+    /* and also undo the PASS1_BITS scaling. */
+
+    dataptr = data;
+    for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+	/* Columns of zeroes can be exploited in the same way as we did with rows.
+	 * However, the row calculation has created many nonzero AC terms, so the
+	 * simplification applies less often (typically 5% to 10% of the time).
+	 * On machines with very fast multiplication, it's possible that the
+	 * test takes more time than it's worth.  In that case this section
+	 * may be commented out.
+	 */
+
+	d0 = dataptr[DCTSIZE*0];
+	d1 = dataptr[DCTSIZE*1];
+	d2 = dataptr[DCTSIZE*2];
+	d3 = dataptr[DCTSIZE*3];
+	d4 = dataptr[DCTSIZE*4];
+	d5 = dataptr[DCTSIZE*5];
+	d6 = dataptr[DCTSIZE*6];
+	d7 = dataptr[DCTSIZE*7];
+
+	/* Even part: reverse the even part of the forward DCT. */
+	/* The rotator is sqrt(2)*c(-6). */
+	if (d6) {
+	    if (d4) {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp0 = SCALE (d0 + d4, CONST_BITS);
+			tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp1 + tmp2;
+			tmp12 = tmp1 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp0 = SCALE (d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp2 - tmp0;
+			tmp12 = -(tmp0 + tmp2);
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, - FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp0 = SCALE (d0 + d4, CONST_BITS);
+			tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp1 + tmp2;
+			tmp12 = tmp1 - tmp2;
+		    } else {
+			/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, -FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp0 = SCALE (d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp2 - tmp0;
+			tmp12 = -(tmp0 + tmp2);
+		    }
+		}
+	    } else {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp0 = SCALE (d0, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp0 + tmp2;
+			tmp12 = tmp0 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
+			z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+			tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+			tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+			tmp10 = tmp3;
+			tmp13 = -tmp3;
+			tmp11 = tmp2;
+			tmp12 = -tmp2;
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, - FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp0 = SCALE (d0, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp0 + tmp2;
+			tmp12 = tmp0 - tmp2;
+		    } else {
+			/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
+			tmp2 = MULTIPLY(d6, - FIX(1.306562965));
+			tmp3 = MULTIPLY(d6, FIX(0.541196100));
+
+			tmp10 = tmp3;
+			tmp13 = -tmp3;
+			tmp11 = tmp2;
+			tmp12 = -tmp2;
+		    }
+		}
+	    }
+	} else {
+	    if (d4) {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp0 = SCALE (d0 + d4, CONST_BITS);
+			tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp1 + tmp2;
+			tmp12 = tmp1 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp0 = SCALE (d4, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp2 - tmp0;
+			tmp12 = -(tmp0 + tmp2);
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+			tmp10 = tmp13 = SCALE (d0 + d4, CONST_BITS);
+			tmp11 = tmp12 = SCALE (d0 - d4, CONST_BITS);
+		    } else {
+			/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
+			tmp10 = tmp13 = SCALE (d4, CONST_BITS);
+			tmp11 = tmp12 = -tmp10;
+		    }
+		}
+	    } else {
+		if (d2) {
+		    if (d0) {
+			/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp0 = SCALE (d0, CONST_BITS);
+
+			tmp10 = tmp0 + tmp3;
+			tmp13 = tmp0 - tmp3;
+			tmp11 = tmp0 + tmp2;
+			tmp12 = tmp0 - tmp2;
+		    } else {
+			/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
+			tmp2 = MULTIPLY(d2, FIX(0.541196100));
+			tmp3 = MULTIPLY(d2, FIX(1.306562965));
+
+			tmp10 = tmp3;
+			tmp13 = -tmp3;
+			tmp11 = tmp2;
+			tmp12 = -tmp2;
+		    }
+		} else {
+		    if (d0) {
+			/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
+			tmp10 = tmp13 = tmp11 = tmp12 = SCALE (d0, CONST_BITS);
+		    } else {
+			/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
+			tmp10 = tmp13 = tmp11 = tmp12 = 0;
+		    }
+		}
+	    }
+	}
+
+	/* Odd part per figure 8; the matrix is unitary and hence its
+	 * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+	 */
+	if (d7) {
+	    if (d5) {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+			z1 = d7 + d1;
+			z2 = d5 + d3;
+			z3 = d7 + d3;
+			z4 = d5 + d1;
+			z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(z1, - FIX(0.899976223));
+			z2 = MULTIPLY(z2, - FIX(2.562915447));
+			z3 = MULTIPLY(z3, - FIX(1.961570560));
+			z4 = MULTIPLY(z4, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+			z1 = d7;
+			z2 = d5 + d3;
+			z3 = d7 + d3;
+			z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			z1 = MULTIPLY(d7, - FIX(0.899976223));
+			z2 = MULTIPLY(z2, - FIX(2.562915447));
+			z3 = MULTIPLY(z3, - FIX(1.961570560));
+			z4 = MULTIPLY(d5, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 = z1 + z4;
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+			z1 = d7 + d1;
+			z2 = d5;
+			z3 = d7;
+			z4 = d5 + d1;
+			z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(z1, - FIX(0.899976223));
+			z2 = MULTIPLY(d5, - FIX(2.562915447));
+			z3 = MULTIPLY(d7, - FIX(1.961570560));
+			z4 = MULTIPLY(z4, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 = z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+			tmp0 = MULTIPLY(d7, - FIX(0.601344887));
+			z1 = MULTIPLY(d7, - FIX(0.899976223));
+			z3 = MULTIPLY(d7, - FIX(1.961570560));
+			tmp1 = MULTIPLY(d5, - FIX(0.509795578));
+			z2 = MULTIPLY(d5, - FIX(2.562915447));
+			z4 = MULTIPLY(d5, - FIX(0.390180644));
+			z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z3;
+			tmp1 += z4;
+			tmp2 = z2 + z3;
+			tmp3 = z1 + z4;
+		    }
+		}
+	    } else {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+			z1 = d7 + d1;
+			z3 = d7 + d3;
+			z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
+
+			tmp0 = MULTIPLY(d7, FIX(0.298631336));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(z1, - FIX(0.899976223));
+			z2 = MULTIPLY(d3, - FIX(2.562915447));
+			z3 = MULTIPLY(z3, - FIX(1.961570560));
+			z4 = MULTIPLY(d1, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 += z1 + z3;
+			tmp1 = z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+			z3 = d7 + d3;
+
+			tmp0 = MULTIPLY(d7, - FIX(0.601344887));
+			z1 = MULTIPLY(d7, - FIX(0.899976223));
+			tmp2 = MULTIPLY(d3, FIX(0.509795579));
+			z2 = MULTIPLY(d3, - FIX(2.562915447));
+			z5 = MULTIPLY(z3, FIX(1.175875602));
+			z3 = MULTIPLY(z3, - FIX(0.785694958));
+
+			tmp0 += z3;
+			tmp1 = z2 + z5;
+			tmp2 += z3;
+			tmp3 = z1 + z5;
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+			z1 = d7 + d1;
+			z5 = MULTIPLY(z1, FIX(1.175875602));
+
+			z1 = MULTIPLY(z1, FIX(0.275899379));
+			z3 = MULTIPLY(d7, - FIX(1.961570560));
+			tmp0 = MULTIPLY(d7, - FIX(1.662939224));
+			z4 = MULTIPLY(d1, - FIX(0.390180644));
+			tmp3 = MULTIPLY(d1, FIX(1.111140466));
+
+			tmp0 += z1;
+			tmp1 = z4 + z5;
+			tmp2 = z3 + z5;
+			tmp3 += z1;
+		    } else {
+			/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+			tmp0 = MULTIPLY(d7, - FIX(1.387039845));
+			tmp1 = MULTIPLY(d7, FIX(1.175875602));
+			tmp2 = MULTIPLY(d7, - FIX(0.785694958));
+			tmp3 = MULTIPLY(d7, FIX(0.275899379));
+		    }
+		}
+	    }
+	} else {
+	    if (d5) {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+			z2 = d5 + d3;
+			z4 = d5 + d1;
+			z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
+
+			tmp1 = MULTIPLY(d5, FIX(2.053119869));
+			tmp2 = MULTIPLY(d3, FIX(3.072711026));
+			tmp3 = MULTIPLY(d1, FIX(1.501321110));
+			z1 = MULTIPLY(d1, - FIX(0.899976223));
+			z2 = MULTIPLY(z2, - FIX(2.562915447));
+			z3 = MULTIPLY(d3, - FIX(1.961570560));
+			z4 = MULTIPLY(z4, - FIX(0.390180644));
+
+			z3 += z5;
+			z4 += z5;
+
+			tmp0 = z1 + z3;
+			tmp1 += z2 + z4;
+			tmp2 += z2 + z3;
+			tmp3 += z1 + z4;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+			z2 = d5 + d3;
+
+			z5 = MULTIPLY(z2, FIX(1.175875602));
+			tmp1 = MULTIPLY(d5, FIX(1.662939225));
+			z4 = MULTIPLY(d5, - FIX(0.390180644));
+			z2 = MULTIPLY(z2, - FIX(1.387039845));
+			tmp2 = MULTIPLY(d3, FIX(1.111140466));
+			z3 = MULTIPLY(d3, - FIX(1.961570560));
+
+			tmp0 = z3 + z5;
+			tmp1 += z2;
+			tmp2 += z2;
+			tmp3 = z4 + z5;
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+			z4 = d5 + d1;
+
+			z5 = MULTIPLY(z4, FIX(1.175875602));
+			z1 = MULTIPLY(d1, - FIX(0.899976223));
+			tmp3 = MULTIPLY(d1, FIX(0.601344887));
+			tmp1 = MULTIPLY(d5, - FIX(0.509795578));
+			z2 = MULTIPLY(d5, - FIX(2.562915447));
+			z4 = MULTIPLY(z4, FIX(0.785694958));
+
+			tmp0 = z1 + z5;
+			tmp1 += z4;
+			tmp2 = z2 + z5;
+			tmp3 += z4;
+		    } else {
+			/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+			tmp0 = MULTIPLY(d5, FIX(1.175875602));
+			tmp1 = MULTIPLY(d5, FIX(0.275899380));
+			tmp2 = MULTIPLY(d5, - FIX(1.387039845));
+			tmp3 = MULTIPLY(d5, FIX(0.785694958));
+		    }
+		}
+	    } else {
+		if (d3) {
+		    if (d1) {
+			/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+			z5 = d1 + d3;
+			tmp3 = MULTIPLY(d1, FIX(0.211164243));
+			tmp2 = MULTIPLY(d3, - FIX(1.451774981));
+			z1 = MULTIPLY(d1, FIX(1.061594337));
+			z2 = MULTIPLY(d3, - FIX(2.172734803));
+			z4 = MULTIPLY(z5, FIX(0.785694958));
+			z5 = MULTIPLY(z5, FIX(1.175875602));
+
+			tmp0 = z1 - z4;
+			tmp1 = z2 + z4;
+			tmp2 += z5;
+			tmp3 += z5;
+		    } else {
+			/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+			tmp0 = MULTIPLY(d3, - FIX(0.785694958));
+			tmp1 = MULTIPLY(d3, - FIX(1.387039845));
+			tmp2 = MULTIPLY(d3, - FIX(0.275899379));
+			tmp3 = MULTIPLY(d3, FIX(1.175875602));
+		    }
+		} else {
+		    if (d1) {
+			/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+			tmp0 = MULTIPLY(d1, FIX(0.275899379));
+			tmp1 = MULTIPLY(d1, FIX(0.785694958));
+			tmp2 = MULTIPLY(d1, FIX(1.175875602));
+			tmp3 = MULTIPLY(d1, FIX(1.387039845));
+		    } else {
+			/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+			tmp0 = tmp1 = tmp2 = tmp3 = 0;
+		    }
+		}
+	    }
+	}
+
+	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+	dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
+	    CONST_BITS+PASS1_BITS+3);
+	dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
+	    CONST_BITS+PASS1_BITS+3);
+
+	dataptr++;			/* advance pointer to next column */
+    }
+}
+
+#else
+
+/*---- debugging/tracing macros ----*/
+
+#if _MSC_VER
+#pragma optimize("",on)
+#if _MSC_VER > 700
+/*#pragma optimize("l",off)*/
+#endif
+#endif
+
+#define idct_single_pos0()
+#define idct_zero_col_stat()
+#define idct_zero_row_stat()
+#define idct_nonzero_col_stat()
+#define idct_nonzero_row_stat()
+#define DUMP_COEFS(p)
+#define TRACE(args)
+#define FAST_DCTPTRS	1
+
+#if 0	/* to count cases */
+void idct_single_pos0 (void) { static int count; count++; }
+void idct_zero_col_stat (void) { static int count; count++; }
+void idct_zero_row_stat (void) { static int count; count++; }
+void idct_nonzero_col_stat (void) { static int count; count++; }
+void idct_nonzero_row_stat (void) { static int count; count++; }
+#undef idct_single_pos0
+#undef idct_zero_col_stat
+#undef idct_zero_row_stat
+#undef idct_nonzero_col_stat
+#undef idct_nonzero_row_stat
+#endif
+
+void init_pre_idct (void) { }
+
+void j_rev_dct_sparse (DCTBLOCK data, int pos)
+{
+    /* If just DC Coefficient. */
+
+    if (pos == 0) {
+	register DCTELEM *dp, *dq;
+	DCTELEM dcval;
+
+	idct_single_pos0();
+
+	dp = data;
+	dcval = dp[0];
+	if (dcval < 0)
+	    dcval = (short)((dcval - 3) >> 3);
+	else
+	    dcval = (short)((dcval + 4) >> 3);
+
+	if (dcval) {
+	    for (dq = dp + 64; dp < dq; dp += 8) {
+		dp[3] = dp[2] = dp[1] = dp[0] = dcval;
+		dp[7] = dp[6] = dp[5] = dp[4] = dcval;
+	    }
+	}
+	return;
+    }
+
+    /* Some other coeff */
+    j_rev_dct (data);
+}
+
+#ifndef OPTIMIZE_ASM
+void j_rev_dct (DCTBLOCK data)
+{
+    INT32 tmp0, tmp1, tmp2, tmp3;
+    INT32 tmp10, tmp11, tmp12, tmp13;
+    INT32 z1, z2, z3, z4, z5;
+    register DCTELEM *dp;
+    int rowctr;
+    SHIFT_TEMPS;
+
+    /* Pass 1: process rows. */
+    /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+    /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+    DUMP_COEFS(data);
+
+    dp = data;
+    for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--, dp += DCTSIZE) {
+	/* Due to quantization, we will usually find that many of the input
+	 * coefficients are zero, especially the AC terms.  We can exploit this
+	 * by short-circuiting the IDCT calculation for any row in which all
+	 * the AC terms are zero.  In that case each output is equal to the
+	 * DC coefficient (with scale factor as needed).
+	 * With typical images and quantization tables, half or more of the
+	 * row DCT calculations can be simplified this way.
+	 */
+
+#if FAST_DCTPTRS
+#define d0	dp[0]
+#define d1	dp[1]
+#define d2	dp[2]
+#define d3	dp[3]
+#define d4	dp[4]
+#define d5	dp[5]
+#define d6	dp[6]
+#define d7	dp[7]
+#else
+	int d0 = dp[0];
+	int d1 = dp[1];
+	int d2 = dp[2];
+	int d3 = dp[3];
+	int d4 = dp[4];
+	int d5 = dp[5];
+	int d6 = dp[6];
+	int d7 = dp[7];
+#endif
+
+#ifndef NO_ZERO_ROW_TEST
+	if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
+	    /* AC terms all zero */
+	    DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+
+	    if (d0) {
+		dp[0] = dcval;
+		dp[1] = dcval;
+		dp[2] = dcval;
+		dp[3] = dcval;
+		dp[4] = dcval;
+		dp[5] = dcval;
+		dp[6] = dcval;
+		dp[7] = dcval;
+	    }
+	    idct_zero_row_stat();
+	    continue;
+	}
+#endif
+
+	idct_nonzero_row_stat();
+
+	/* Even part: reverse the even part of the forward DCT. */
+	/* The rotator is sqrt(2)*c(-6). */
+
+	z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+	tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+	tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+	tmp0 = SCALE (d0 + d4, CONST_BITS);
+	tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+	tmp10 = tmp0 + tmp3;
+	tmp13 = tmp0 - tmp3;
+	tmp11 = tmp1 + tmp2;
+	tmp12 = tmp1 - tmp2;
+
+	/* Odd part per figure 8; the matrix is unitary and hence its
+	 * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+	 */
+
+	z1 = d7 + d1;
+	z2 = d5 + d3;
+	z3 = d7 + d3;
+	z4 = d5 + d1;
+	z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */
+
+	tmp0 = MULTIPLY(d7, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */
+	tmp1 = MULTIPLY(d5, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */
+	tmp2 = MULTIPLY(d3, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */
+	tmp3 = MULTIPLY(d1, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */
+	z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */
+	z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */
+	z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */
+	z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */
+
+	z3 += z5;
+	z4 += z5;
+
+	tmp0 += z1 + z3;
+	tmp1 += z2 + z4;
+	tmp2 += z2 + z3;
+	tmp3 += z1 + z4;
+
+	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+	dp[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+	dp[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+	dp[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+	dp[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+	dp[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+	dp[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+	dp[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+	dp[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    }
+#if FAST_DCTPTRS
+#undef d0
+#undef d1
+#undef d2
+#undef d3
+#undef d4
+#undef d5
+#undef d6
+#undef d7
+#endif
+
+    /* Pass 2: process columns. */
+    /* Note that we must descale the results by a factor of 8 == 2**3, */
+    /* and also undo the PASS1_BITS scaling. */
+
+    dp = data;
+    for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--, dp++) {
+	/* Columns of zeroes can be exploited in the same way as we did with rows.
+	 * However, the row calculation has created many nonzero AC terms, so the
+	 * simplification applies less often (typically 5% to 10% of the time).
+	 * On machines with very fast multiplication, it's possible that the
+	 * test takes more time than it's worth.  In that case this section
+	 * may be commented out.
+	 */
+
+#if FAST_DCTPTRS
+#define d0	dp[DCTSIZE*0]
+#define d1	dp[DCTSIZE*1]
+#define d2	dp[DCTSIZE*2]
+#define d3	dp[DCTSIZE*3]
+#define d4	dp[DCTSIZE*4]
+#define d5	dp[DCTSIZE*5]
+#define d6	dp[DCTSIZE*6]
+#define d7	dp[DCTSIZE*7]
+#else
+	int d0 = dp[DCTSIZE*0];
+	int d1 = dp[DCTSIZE*1];
+	int d2 = dp[DCTSIZE*2];
+	int d3 = dp[DCTSIZE*3];
+	int d4 = dp[DCTSIZE*4];
+	int d5 = dp[DCTSIZE*5];
+	int d6 = dp[DCTSIZE*6];
+	int d7 = dp[DCTSIZE*7];
+#endif
+
+#ifndef NO_ZERO_COLUMN_TEST
+	if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
+	    /* AC terms all zero */
+	    DCTELEM dcval = (DCTELEM) DESCALE((INT32) d0, PASS1_BITS+3);
+
+	    if (d0) {
+		dp[DCTSIZE*0] = dcval;
+		dp[DCTSIZE*1] = dcval;
+		dp[DCTSIZE*2] = dcval;
+		dp[DCTSIZE*3] = dcval;
+		dp[DCTSIZE*4] = dcval;
+		dp[DCTSIZE*5] = dcval;
+		dp[DCTSIZE*6] = dcval;
+		dp[DCTSIZE*7] = dcval;
+	    }
+	    idct_zero_col_stat();
+	    continue;
+	}
+#endif
+
+	idct_nonzero_col_stat();
+
+	/* Even part: reverse the even part of the forward DCT. */
+	/* The rotator is sqrt(2)*c(-6). */
+
+	z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
+	tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
+	tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
+
+	tmp0 = SCALE (d0 + d4, CONST_BITS);
+	tmp1 = SCALE (d0 - d4, CONST_BITS);
+
+	tmp10 = tmp0 + tmp3;
+	tmp13 = tmp0 - tmp3;
+	tmp11 = tmp1 + tmp2;
+	tmp12 = tmp1 - tmp2;
+
+	/* Odd part per figure 8; the matrix is unitary and hence its
+	 * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+	 */
+
+	z1 = d7 + d1;
+	z2 = d5 + d3;
+	z3 = d7 + d3;
+	z4 = d5 + d1;
+	z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */
+
+	tmp0 = MULTIPLY(d7, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */
+	tmp1 = MULTIPLY(d5, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */
+	tmp2 = MULTIPLY(d3, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */
+	tmp3 = MULTIPLY(d1, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */
+	z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */
+	z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */
+	z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */
+	z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */
+
+	z3 += z5;
+	z4 += z5;
+
+	tmp0 += z1 + z3;
+	tmp1 += z2 + z4;
+	tmp2 += z2 + z3;
+	tmp3 += z1 + z4;
+
+	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+	dp[DCTSIZE*0] = (DCTELEM)DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*7] = (DCTELEM)DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*1] = (DCTELEM)DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*6] = (DCTELEM)DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*2] = (DCTELEM)DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*5] = (DCTELEM)DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*3] = (DCTELEM)DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3);
+	dp[DCTSIZE*4] = (DCTELEM)DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3);
+    }
+#if FAST_DCTPTRS
+#undef d0
+#undef d1
+#undef d2
+#undef d3
+#undef d4
+#undef d5
+#undef d6
+#undef d7
+#endif
+}
+#endif	/* optimize.asm */
+
+#endif
diff --git a/libav/mjpegenc.c b/libav/mjpegenc.c
new file mode 100644
index 0000000000..027287528c
--- /dev/null
+++ b/libav/mjpegenc.c
@@ -0,0 +1,416 @@
+/*
+ * MJPEG encoder
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+typedef struct MJpegContext {
+    UINT8 huff_size_dc_luminance[12];
+    UINT16 huff_code_dc_luminance[12];
+    UINT8 huff_size_dc_chrominance[12];
+    UINT16 huff_code_dc_chrominance[12];
+
+    UINT8 huff_size_ac_luminance[256];
+    UINT16 huff_code_ac_luminance[256];
+    UINT8 huff_size_ac_chrominance[256];
+    UINT16 huff_code_ac_chrominance[256];
+} MJpegContext;
+
+#define SOF0 0xc0
+#define SOI 0xd8
+#define EOI 0xd9
+#define DQT 0xdb
+#define DHT 0xc4
+#define SOS 0xda
+
+#if 0
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned char std_luminance_quant_tbl[64] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned char std_chrominance_quant_tbl[64] = {
+    17,  18,  24,  47,  99,  99,  99,  99,
+    18,  21,  26,  66,  99,  99,  99,  99,
+    24,  26,  56,  99,  99,  99,  99,  99,
+    47,  66,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99
+};
+#endif
+
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+static const UINT8 bits_dc_luminance[17] =
+{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+static const UINT8 val_dc_luminance[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+static const UINT8 bits_dc_chrominance[17] =
+{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+static const UINT8 val_dc_chrominance[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+static const UINT8 bits_ac_luminance[17] =
+{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+static const UINT8 val_ac_luminance[] =
+{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa 
+};
+
+static const UINT8 bits_ac_chrominance[17] =
+{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+
+static const UINT8 val_ac_chrominance[] =
+{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa 
+};
+
+
+/* isn't this function nicer than the one in the libjpeg ? */
+static void build_huffman_codes(UINT8 *huff_size, UINT16 *huff_code,
+                                const UINT8 *bits_table, const UINT8 *val_table)
+{
+    int i, j, k,nb, code, sym;
+
+    code = 0;
+    k = 0;
+    for(i=1;i<=16;i++) {
+        nb = bits_table[i];
+        for(j=0;j<nb;j++) {
+            sym = val_table[k++];
+            huff_size[sym] = i;
+            huff_code[sym] = code;
+            code++;
+        }
+        code <<= 1;
+    }
+}
+
+int mjpeg_init(MpegEncContext *s)
+{
+    MJpegContext *m;
+    
+    m = malloc(sizeof(MJpegContext));
+    if (!m)
+        return -1;
+
+    /* build all the huffman tables */
+    build_huffman_codes(m->huff_size_dc_luminance,
+                        m->huff_code_dc_luminance,
+                        bits_dc_luminance,
+                        val_dc_luminance);
+    build_huffman_codes(m->huff_size_dc_chrominance,
+                        m->huff_code_dc_chrominance,
+                        bits_dc_chrominance,
+                        val_dc_chrominance);
+    build_huffman_codes(m->huff_size_ac_luminance,
+                        m->huff_code_ac_luminance,
+                        bits_ac_luminance,
+                        val_ac_luminance);
+    build_huffman_codes(m->huff_size_ac_chrominance,
+                        m->huff_code_ac_chrominance,
+                        bits_ac_chrominance,
+                        val_ac_chrominance);
+    
+    s->mjpeg_ctx = m;
+    return 0;
+}
+
+void mjpeg_close(MpegEncContext *s)
+{
+    free(s->mjpeg_ctx);
+}
+
+static inline void put_marker(PutBitContext *p, int code)
+{
+    put_bits(p, 8, 0xff);
+    put_bits(p, 8, code);
+}
+
+/* table_class: 0 = DC coef, 1 = AC coefs */
+static int put_huffman_table(MpegEncContext *s, int table_class, int table_id,
+                             const UINT8 *bits_table, const UINT8 *value_table)
+{
+    PutBitContext *p = &s->pb;
+    int n, i;
+
+    put_bits(p, 4, table_class);
+    put_bits(p, 4, table_id);
+
+    n = 0;
+    for(i=1;i<=16;i++) {
+        n += bits_table[i];
+        put_bits(p, 8, bits_table[i]);
+    }
+
+    for(i=0;i<n;i++)
+        put_bits(p, 8, value_table[i]);
+
+    return n + 17;
+}
+
+static void jpeg_table_header(MpegEncContext *s)
+{
+    PutBitContext *p = &s->pb;
+    int i, size;
+    UINT8 *ptr;
+
+    /* quant matrixes */
+    put_marker(p, DQT);
+    put_bits(p, 16, 2 + 1 * (1 + 64));
+    put_bits(p, 4, 0); /* 8 bit precision */
+    put_bits(p, 4, 0); /* table 0 */
+    for(i=0;i<64;i++) {
+        put_bits(p, 8, s->init_intra_matrix[i]);
+    }
+#if 0
+    put_bits(p, 4, 0); /* 8 bit precision */
+    put_bits(p, 4, 1); /* table 1 */
+    for(i=0;i<64;i++) {
+        put_bits(p, 8, m->chrominance_matrix[i]);
+    }
+#endif
+
+    /* huffman table */
+    put_marker(p, DHT);
+    flush_put_bits(p);
+    ptr = p->buf_ptr;
+    put_bits(p, 16, 0); /* patched later */
+    size = 2;
+    size += put_huffman_table(s, 0, 0, bits_dc_luminance, val_dc_luminance);
+    size += put_huffman_table(s, 0, 1, bits_dc_chrominance, val_dc_chrominance);
+    
+    size += put_huffman_table(s, 1, 0, bits_ac_luminance, val_ac_luminance);
+    size += put_huffman_table(s, 1, 1, bits_ac_chrominance, val_ac_chrominance);
+    ptr[0] = size >> 8;
+    ptr[1] = size;
+}
+
+void mjpeg_picture_header(MpegEncContext *s)
+{
+    put_marker(&s->pb, SOI);
+
+    jpeg_table_header(s);
+
+    put_marker(&s->pb, SOF0);
+
+    put_bits(&s->pb, 16, 17);
+    put_bits(&s->pb, 8, 8); /* 8 bits/component */
+    put_bits(&s->pb, 16, s->height);
+    put_bits(&s->pb, 16, s->width);
+    put_bits(&s->pb, 8, 3); /* 3 components */
+    
+    /* Y component */
+    put_bits(&s->pb, 8, 1); /* component number */
+    put_bits(&s->pb, 4, 2); /* H factor */
+    put_bits(&s->pb, 4, 2); /* V factor */
+    put_bits(&s->pb, 8, 0); /* select matrix */
+    
+    /* Cb component */
+    put_bits(&s->pb, 8, 2); /* component number */
+    put_bits(&s->pb, 4, 1); /* H factor */
+    put_bits(&s->pb, 4, 1); /* V factor */
+    put_bits(&s->pb, 8, 0); /* select matrix */
+
+    /* Cr component */
+    put_bits(&s->pb, 8, 3); /* component number */
+    put_bits(&s->pb, 4, 1); /* H factor */
+    put_bits(&s->pb, 4, 1); /* V factor */
+    put_bits(&s->pb, 8, 0); /* select matrix */
+
+    /* scan header */
+    put_marker(&s->pb, SOS);
+    put_bits(&s->pb, 16, 12); /* length */
+    put_bits(&s->pb, 8, 3); /* 3 components */
+    
+    /* Y component */
+    put_bits(&s->pb, 8, 1); /* index */
+    put_bits(&s->pb, 4, 0); /* DC huffman table index */
+    put_bits(&s->pb, 4, 0); /* AC huffman table index */
+    
+    /* Cb component */
+    put_bits(&s->pb, 8, 2); /* index */
+    put_bits(&s->pb, 4, 1); /* DC huffman table index */
+    put_bits(&s->pb, 4, 1); /* AC huffman table index */
+    
+    /* Cr component */
+    put_bits(&s->pb, 8, 3); /* index */
+    put_bits(&s->pb, 4, 1); /* DC huffman table index */
+    put_bits(&s->pb, 4, 1); /* AC huffman table index */
+
+    put_bits(&s->pb, 8, 0); /* Ss (not used) */
+    put_bits(&s->pb, 8, 63); /* Se (not used) */
+    put_bits(&s->pb, 8, 0); /* (not used) */
+}
+
+void mjpeg_picture_trailer(MpegEncContext *s)
+{
+    jflush_put_bits(&s->pb);
+    put_marker(&s->pb, EOI);
+}
+
+static inline void encode_dc(MpegEncContext *s, int val, 
+                             UINT8 *huff_size, UINT16 *huff_code)
+{
+    int mant, nbits;
+
+    if (val == 0) {
+        jput_bits(&s->pb, huff_size[0], huff_code[0]);
+    } else {
+        mant = val;
+        if (val < 0) {
+            val = -val;
+            mant--;
+        }
+        
+        /* compute the log (XXX: optimize) */
+        nbits = 0;
+        while (val != 0) {
+            val = val >> 1;
+            nbits++;
+        }
+            
+        jput_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
+        
+        jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
+    }
+}
+
+static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
+{
+    int mant, nbits, code, i, j;
+    int component, dc, run, last_index, val;
+    MJpegContext *m = s->mjpeg_ctx;
+    UINT8 *huff_size_ac;
+    UINT16 *huff_code_ac;
+    
+    /* DC coef */
+    component = (n <= 3 ? 0 : n - 4 + 1);
+    dc = block[0]; /* overflow is impossible */
+    val = dc - s->last_dc[component];
+    if (n < 4) {
+        encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
+        huff_size_ac = m->huff_size_ac_luminance;
+        huff_code_ac = m->huff_code_ac_luminance;
+    } else {
+        encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+        huff_size_ac = m->huff_size_ac_chrominance;
+        huff_code_ac = m->huff_code_ac_chrominance;
+    }
+    s->last_dc[component] = dc;
+    
+    /* AC coefs */
+    
+    run = 0;
+    last_index = s->block_last_index[n];
+    for(i=1;i<=last_index;i++) {
+        j = zigzag_direct[i];
+        val = block[j];
+        if (val == 0) {
+            run++;
+        } else {
+            while (run >= 16) {
+                jput_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
+                run -= 16;
+            }
+            mant = val;
+            if (val < 0) {
+                val = -val;
+                mant--;
+            }
+            
+            /* compute the log (XXX: optimize) */
+            nbits = 0;
+            while (val != 0) {
+                val = val >> 1;
+                nbits++;
+            }
+            code = (run << 4) | nbits;
+
+            jput_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
+        
+            jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
+            run = 0;
+        }
+    }
+
+    /* output EOB only if not already 64 values */
+    if (last_index < 63 || run != 0)
+        jput_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
+}
+
+void mjpeg_encode_mb(MpegEncContext *s, 
+                     DCTELEM block[6][64])
+{
+    int i;
+    for(i=0;i<6;i++) {
+        encode_block(s, block[i], i);
+    }
+}
diff --git a/libav/mpegaudio.c b/libav/mpegaudio.c
new file mode 100644
index 0000000000..50ffc3c200
--- /dev/null
+++ b/libav/mpegaudio.c
@@ -0,0 +1,754 @@
+/*
+ * The simplest mpeg audio layer 2 encoder
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <math.h>
+#include "avcodec.h"
+#include "mpegaudio.h"
+
+#define NDEBUG
+#include <assert.h>
+
+/* define it to use floats in quantization (I don't like floats !) */
+//#define USE_FLOATS
+
+#define MPA_STEREO  0
+#define MPA_JSTEREO 1
+#define MPA_DUAL    2
+#define MPA_MONO    3
+
+#include "mpegaudiotab.h"
+
+int MPA_encode_init(AVEncodeContext *avctx)
+{
+    MpegAudioContext *s = avctx->priv_data;
+    int freq = avctx->rate;
+    int bitrate = avctx->bit_rate;
+    int channels = avctx->channels;
+    int i, v, table;
+    float a;
+
+    if (channels != 1)
+        return -1;
+
+    bitrate = bitrate / 1000;
+    s->freq = freq;
+    s->bit_rate = bitrate * 1000;
+    avctx->frame_size = MPA_FRAME_SIZE;
+    avctx->key_frame = 1; /* always key frame */
+
+    /* encoding freq */
+    s->lsf = 0;
+    for(i=0;i<3;i++) {
+        if (freq_tab[i] == freq) 
+            break;
+        if ((freq_tab[i] / 2) == freq) {
+            s->lsf = 1;
+            break;
+        }
+    }
+    if (i == 3)
+        return -1;
+    s->freq_index = i;
+
+    /* encoding bitrate & frequency */
+    for(i=0;i<15;i++) {
+        if (bitrate_tab[1-s->lsf][i] == bitrate) 
+            break;
+    }
+    if (i == 15)
+        return -1;
+    s->bitrate_index = i;
+
+    /* compute total header size & pad bit */
+    
+    a = (float)(bitrate * 1000 * MPA_FRAME_SIZE) / (freq * 8.0);
+    s->frame_size = ((int)a) * 8;
+
+    /* frame fractional size to compute padding */
+    s->frame_frac = 0;
+    s->frame_frac_incr = (int)((a - floor(a)) * 65536.0);
+    
+    /* select the right allocation table */
+    if (!s->lsf) {
+        if ((freq == 48000 && bitrate >= 56) ||
+            (bitrate >= 56 && bitrate <= 80)) 
+            table = 0;
+        else if (freq != 48000 && bitrate >= 96) 
+            table = 1;
+        else if (freq != 32000 && bitrate <= 48) 
+            table = 2;
+        else 
+            table = 3;
+    } else {
+        table = 4;
+    }
+    /* number of used subbands */
+    s->sblimit = sblimit_table[table];
+    s->alloc_table = alloc_tables[table];
+
+#ifdef DEBUG
+    printf("%d kb/s, %d Hz, frame_size=%d bits, table=%d, padincr=%x\n", 
+           bitrate, freq, s->frame_size, table, s->frame_frac_incr);
+#endif
+
+    s->samples_offset = 0;
+
+    for(i=0;i<512;i++) {
+        float a = enwindow[i] * 32768.0 * 16.0;
+        filter_bank[i] = (int)(a);
+    }
+    for(i=0;i<64;i++) {
+        v = (int)(pow(2.0, (3 - i) / 3.0) * (1 << 20));
+        if (v <= 0)
+            v = 1;
+        scale_factor_table[i] = v;
+#ifdef USE_FLOATS
+        scale_factor_inv_table[i] = pow(2.0, -(3 - i) / 3.0) / (float)(1 << 20);
+#else
+#define P 15
+        scale_factor_shift[i] = 21 - P - (i / 3);
+        scale_factor_mult[i] = (1 << P) * pow(2.0, (i % 3) / 3.0);
+#endif
+    }
+    for(i=0;i<128;i++) {
+        v = i - 64;
+        if (v <= -3)
+            v = 0;
+        else if (v < 0)
+            v = 1;
+        else if (v == 0)
+            v = 2;
+        else if (v < 3)
+            v = 3;
+        else 
+            v = 4;
+        scale_diff_table[i] = v;
+    }
+
+    for(i=0;i<17;i++) {
+        v = quant_bits[i];
+        if (v < 0) 
+            v = -v;
+        else
+            v = v * 3;
+        total_quant_bits[i] = 12 * v;
+    }
+
+    return 0;
+}
+
+/* 32 point floating point IDCT */
+static void idct32(int *out, int *tab, int sblimit, int left_shift)
+{
+    int i, j;
+    int *t, *t1, xr;
+    const int *xp = costab32;
+
+    for(j=31;j>=3;j-=2) tab[j] += tab[j - 2];
+    
+    t = tab + 30;
+    t1 = tab + 2;
+    do {
+        t[0] += t[-4];
+        t[1] += t[1 - 4];
+        t -= 4;
+    } while (t != t1);
+
+    t = tab + 28;
+    t1 = tab + 4;
+    do {
+        t[0] += t[-8];
+        t[1] += t[1-8];
+        t[2] += t[2-8];
+        t[3] += t[3-8];
+        t -= 8;
+    } while (t != t1);
+    
+    t = tab;
+    t1 = tab + 32;
+    do {
+        t[ 3] = -t[ 3];    
+        t[ 6] = -t[ 6];    
+        
+        t[11] = -t[11];    
+        t[12] = -t[12];    
+        t[13] = -t[13];    
+        t[15] = -t[15]; 
+        t += 16;
+    } while (t != t1);
+
+    
+    t = tab;
+    t1 = tab + 8;
+    do {
+        int x1, x2, x3, x4;
+        
+        x3 = MUL(t[16], FIX(SQRT2*0.5));
+        x4 = t[0] - x3;
+        x3 = t[0] + x3;
+        
+        x2 = MUL(-(t[24] + t[8]), FIX(SQRT2*0.5));
+        x1 = MUL((t[8] - x2), xp[0]);
+        x2 = MUL((t[8] + x2), xp[1]);
+
+        t[ 0] = x3 + x1;
+        t[ 8] = x4 - x2;
+        t[16] = x4 + x2;
+        t[24] = x3 - x1;
+        t++;
+    } while (t != t1);
+
+    xp += 2;
+    t = tab;
+    t1 = tab + 4;
+    do {
+        xr = MUL(t[28],xp[0]);
+        t[28] = (t[0] - xr);
+        t[0] = (t[0] + xr);
+
+        xr = MUL(t[4],xp[1]);
+        t[ 4] = (t[24] - xr);
+        t[24] = (t[24] + xr);
+        
+        xr = MUL(t[20],xp[2]);
+        t[20] = (t[8] - xr);
+        t[ 8] = (t[8] + xr);
+            
+        xr = MUL(t[12],xp[3]);
+        t[12] = (t[16] - xr);
+        t[16] = (t[16] + xr);
+        t++;
+    } while (t != t1);
+    xp += 4;
+
+    for (i = 0; i < 4; i++) {
+        xr = MUL(tab[30-i*4],xp[0]);
+        tab[30-i*4] = (tab[i*4] - xr);
+        tab[   i*4] = (tab[i*4] + xr);
+        
+        xr = MUL(tab[ 2+i*4],xp[1]);
+        tab[ 2+i*4] = (tab[28-i*4] - xr);
+        tab[28-i*4] = (tab[28-i*4] + xr);
+        
+        xr = MUL(tab[31-i*4],xp[0]);
+        tab[31-i*4] = (tab[1+i*4] - xr);
+        tab[ 1+i*4] = (tab[1+i*4] + xr);
+        
+        xr = MUL(tab[ 3+i*4],xp[1]);
+        tab[ 3+i*4] = (tab[29-i*4] - xr);
+        tab[29-i*4] = (tab[29-i*4] + xr);
+        
+        xp += 2;
+    }
+
+    t = tab + 30;
+    t1 = tab + 1;
+    do {
+        xr = MUL(t1[0], *xp);
+        t1[0] = (t[0] - xr);
+        t[0] = (t[0] + xr);
+        t -= 2;
+        t1 += 2;
+        xp++;
+    } while (t >= tab);
+
+    for(i=0;i<32;i++) {
+        out[i] = tab[bitinv32[i]] << left_shift;
+    }
+}
+
+static void filter(MpegAudioContext *s, short *samples)
+{
+    short *p, *q;
+    int sum, offset, i, j, norm, n;
+    short tmp[64];
+    int tmp1[32];
+    int *out;
+
+    //    print_pow1(samples, 1152);
+
+    offset = s->samples_offset;
+    out = &s->sb_samples[0][0][0];
+    for(j=0;j<36;j++) {
+        /* 32 samples at once */
+        for(i=0;i<32;i++) 
+            s->samples_buf[offset + (31 - i)] = samples[i];
+
+        /* filter */
+        p = s->samples_buf + offset;
+        q = filter_bank;
+        /* maxsum = 23169 */
+        for(i=0;i<64;i++) {
+            sum = p[0*64] * q[0*64];
+            sum += p[1*64] * q[1*64];
+            sum += p[2*64] * q[2*64];
+            sum += p[3*64] * q[3*64];
+            sum += p[4*64] * q[4*64];
+            sum += p[5*64] * q[5*64];
+            sum += p[6*64] * q[6*64];
+            sum += p[7*64] * q[7*64];
+            tmp[i] = sum >> 14;
+            p++;
+            q++;
+        }
+        tmp1[0] = tmp[16];
+        for( i=1; i<=16; i++ ) tmp1[i] = tmp[i+16]+tmp[16-i];
+        for( i=17; i<=31; i++ ) tmp1[i] = tmp[i+16]-tmp[80-i];
+
+        /* integer IDCT 32 with normalization. XXX: There may be some
+           overflow left */
+        norm = 0;
+        for(i=0;i<32;i++) {
+            norm |= abs(tmp1[i]);
+        }
+        n = log2(norm) - 12;
+        if (n > 0) {
+            for(i=0;i<32;i++) 
+                tmp1[i] >>= n;
+        } else {
+            n = 0;
+        }
+
+        idct32(out, tmp1, s->sblimit, n);
+
+        /* advance of 32 samples */
+        samples += 32;
+        offset -= 32;
+        out += 32;
+        /* handle the wrap around */
+        if (offset < 0) {
+            memmove(s->samples_buf + SAMPLES_BUF_SIZE - (512 - 32), 
+                    s->samples_buf, (512 - 32) * 2);
+            offset = SAMPLES_BUF_SIZE - 512;
+        }
+    }
+    s->samples_offset = offset;
+
+    //    print_pow(s->sb_samples, 1152);
+}
+
+static void compute_scale_factors(unsigned char scale_code[SBLIMIT],
+                                  unsigned char scale_factors[SBLIMIT][3], 
+                                  int sb_samples[3][12][SBLIMIT],
+                                  int sblimit)
+{
+    int *p, vmax, v, n, i, j, k, code;
+    int index, d1, d2;
+    unsigned char *sf = &scale_factors[0][0];
+    
+    for(j=0;j<sblimit;j++) {
+        for(i=0;i<3;i++) {
+            /* find the max absolute value */
+            p = &sb_samples[i][0][j];
+            vmax = abs(*p);
+            for(k=1;k<12;k++) {
+                p += SBLIMIT;
+                v = abs(*p);
+                if (v > vmax)
+                    vmax = v;
+            }
+            /* compute the scale factor index using log 2 computations */
+            if (vmax > 0) {
+                n = log2(vmax);
+                /* n is the position of the MSB of vmax. now 
+                   use at most 2 compares to find the index */
+                index = (21 - n) * 3 - 3;
+                if (index >= 0) {
+                    while (vmax <= scale_factor_table[index+1])
+                        index++;
+                } else {
+                    index = 0; /* very unlikely case of overflow */
+                }
+            } else {
+                index = 63;
+            }
+            
+#if 0
+            printf("%2d:%d in=%x %x %d\n", 
+                   j, i, vmax, scale_factor_table[index], index);
+#endif
+            /* store the scale factor */
+            assert(index >=0 && index <= 63);
+            sf[i] = index;
+        }
+
+        /* compute the transmission factor : look if the scale factors
+           are close enough to each other */
+        d1 = scale_diff_table[sf[0] - sf[1] + 64];
+        d2 = scale_diff_table[sf[1] - sf[2] + 64];
+        
+        /* handle the 25 cases */
+        switch(d1 * 5 + d2) {
+        case 0*5+0:
+        case 0*5+4:
+        case 3*5+4:
+        case 4*5+0:
+        case 4*5+4:
+            code = 0;
+            break;
+        case 0*5+1:
+        case 0*5+2:
+        case 4*5+1:
+        case 4*5+2:
+            code = 3;
+            sf[2] = sf[1];
+            break;
+        case 0*5+3:
+        case 4*5+3:
+            code = 3;
+            sf[1] = sf[2];
+            break;
+        case 1*5+0:
+        case 1*5+4:
+        case 2*5+4:
+            code = 1;
+            sf[1] = sf[0];
+            break;
+        case 1*5+1:
+        case 1*5+2:
+        case 2*5+0:
+        case 2*5+1:
+        case 2*5+2:
+            code = 2;
+            sf[1] = sf[2] = sf[0];
+            break;
+        case 2*5+3:
+        case 3*5+3:
+            code = 2;
+            sf[0] = sf[1] = sf[2];
+            break;
+        case 3*5+0:
+        case 3*5+1:
+        case 3*5+2:
+            code = 2;
+            sf[0] = sf[2] = sf[1];
+            break;
+        case 1*5+3:
+            code = 2;
+            if (sf[0] > sf[2])
+              sf[0] = sf[2];
+            sf[1] = sf[2] = sf[0];
+            break;
+        default:
+            abort();
+        }
+        
+#if 0
+        printf("%d: %2d %2d %2d %d %d -> %d\n", j, 
+               sf[0], sf[1], sf[2], d1, d2, code);
+#endif
+        scale_code[j] = code;
+        sf += 3;
+    }
+}
+
+/* The most important function : psycho acoustic module. In this
+   encoder there is basically none, so this is the worst you can do,
+   but also this is the simpler. */
+static void psycho_acoustic_model(MpegAudioContext *s, short smr[SBLIMIT])
+{
+    int i;
+
+    for(i=0;i<s->sblimit;i++) {
+        smr[i] = (int)(fixed_smr[i] * 10);
+    }
+}
+
+
+#define SB_NOTALLOCATED  0
+#define SB_ALLOCATED     1
+#define SB_NOMORE        2
+
+/* Try to maximize the smr while using a number of bits inferior to
+   the frame size. I tried to make the code simpler, faster and
+   smaller than other encoders :-) */
+static void compute_bit_allocation(MpegAudioContext *s, 
+                                   short smr1[SBLIMIT],
+                                   unsigned char bit_alloc[SBLIMIT],
+                                   int *padding)
+{
+    int i, b, max_smr, max_sb, current_frame_size, max_frame_size;
+    int incr;
+    short smr[SBLIMIT];
+    unsigned char subband_status[SBLIMIT];
+    const unsigned char *alloc;
+
+    memcpy(smr, smr1, sizeof(short) * s->sblimit);
+    memset(subband_status, SB_NOTALLOCATED, s->sblimit);
+    memset(bit_alloc, 0, s->sblimit);
+    
+    /* compute frame size and padding */
+    max_frame_size = s->frame_size;
+    s->frame_frac += s->frame_frac_incr;
+    if (s->frame_frac >= 65536) {
+        s->frame_frac -= 65536;
+        s->do_padding = 1;
+        max_frame_size += 8;
+    } else {
+        s->do_padding = 0;
+    }
+
+    /* compute the header + bit alloc size */
+    current_frame_size = 32;
+    alloc = s->alloc_table;
+    for(i=0;i<s->sblimit;i++) {
+        incr = alloc[0];
+        current_frame_size += incr;
+        alloc += 1 << incr;
+    }
+    for(;;) {
+        /* look for the subband with the largest signal to mask ratio */
+        max_sb = -1;
+        max_smr = 0x80000000;
+        for(i=0;i<s->sblimit;i++) {
+            if (smr[i] > max_smr && subband_status[i] != SB_NOMORE) {
+                max_smr = smr[i];
+                max_sb = i;
+            }
+        }
+#if 0
+        printf("current=%d max=%d max_sb=%d alloc=%d\n", 
+               current_frame_size, max_frame_size, max_sb,
+               bit_alloc[max_sb]);
+#endif        
+        if (max_sb < 0)
+            break;
+        
+        /* find alloc table entry (XXX: not optimal, should use
+           pointer table) */
+        alloc = s->alloc_table;
+        for(i=0;i<max_sb;i++) {
+            alloc += 1 << alloc[0];
+        }
+
+        if (subband_status[max_sb] == SB_NOTALLOCATED) {
+            /* nothing was coded for this band: add the necessary bits */
+            incr = 2 + nb_scale_factors[s->scale_code[max_sb]] * 6;
+            incr += total_quant_bits[alloc[1]];
+        } else {
+            /* increments bit allocation */
+            b = bit_alloc[max_sb];
+            incr = total_quant_bits[alloc[b + 1]] - 
+                total_quant_bits[alloc[b]];
+        }
+
+        if (current_frame_size + incr <= max_frame_size) {
+            /* can increase size */
+            b = ++bit_alloc[max_sb];
+            current_frame_size += incr;
+            /* decrease smr by the resolution we added */
+            smr[max_sb] = smr1[max_sb] - quant_snr[alloc[b]];
+            /* max allocation size reached ? */
+            if (b == ((1 << alloc[0]) - 1))
+                subband_status[max_sb] = SB_NOMORE;
+            else
+                subband_status[max_sb] = SB_ALLOCATED;
+        } else {
+            /* cannot increase the size of this subband */
+            subband_status[max_sb] = SB_NOMORE;
+        }
+    }
+    *padding = max_frame_size - current_frame_size;
+    assert(*padding >= 0);
+
+#if 0
+    for(i=0;i<s->sblimit;i++) {
+        printf("%d ", bit_alloc[i]);
+    }
+    printf("\n");
+#endif
+}
+
+/*
+ * Output the mpeg audio layer 2 frame. Note how the code is small
+ * compared to other encoders :-)
+ */
+static void encode_frame(MpegAudioContext *s,
+                         unsigned char bit_alloc[SBLIMIT],
+                         int padding)
+{
+    int i, j, k, l, bit_alloc_bits, b;
+    unsigned char *sf;
+    int q[3];
+    PutBitContext *p = &s->pb;
+
+    /* header */
+
+    put_bits(p, 12, 0xfff);
+    put_bits(p, 1, 1 - s->lsf); /* 1 = mpeg1 ID, 0 = mpeg2 lsf ID */
+    put_bits(p, 2, 4-2);  /* layer 2 */
+    put_bits(p, 1, 1); /* no error protection */
+    put_bits(p, 4, s->bitrate_index);
+    put_bits(p, 2, s->freq_index);
+    put_bits(p, 1, s->do_padding); /* use padding */
+    put_bits(p, 1, 0);             /* private_bit */
+    put_bits(p, 2, MPA_MONO);
+    put_bits(p, 2, 0); /* mode_ext */
+    put_bits(p, 1, 0); /* no copyright */
+    put_bits(p, 1, 1); /* original */
+    put_bits(p, 2, 0); /* no emphasis */
+
+    /* bit allocation */
+    j = 0;
+    for(i=0;i<s->sblimit;i++) {
+        bit_alloc_bits = s->alloc_table[j];
+        put_bits(p, bit_alloc_bits, bit_alloc[i]);
+        j += 1 << bit_alloc_bits;
+    }
+    
+    /* scale codes */
+    for(i=0;i<s->sblimit;i++) {
+        if (bit_alloc[i]) 
+            put_bits(p, 2, s->scale_code[i]);
+    }
+
+    /* scale factors */
+    sf = &s->scale_factors[0][0];
+    for(i=0;i<s->sblimit;i++) {
+        if (bit_alloc[i]) {
+            switch(s->scale_code[i]) {
+            case 0:
+                put_bits(p, 6, sf[0]);
+                put_bits(p, 6, sf[1]);
+                put_bits(p, 6, sf[2]);
+                break;
+            case 3:
+            case 1:
+                put_bits(p, 6, sf[0]);
+                put_bits(p, 6, sf[2]);
+                break;
+            case 2:
+                put_bits(p, 6, sf[0]);
+                break;
+            }
+        }
+        sf += 3;
+    }
+    
+    /* quantization & write sub band samples */
+
+    for(k=0;k<3;k++) {
+        for(l=0;l<12;l+=3) {
+            j = 0;
+            for(i=0;i<s->sblimit;i++) {
+                bit_alloc_bits = s->alloc_table[j];
+                b = bit_alloc[i];
+                if (b) {
+                    int qindex, steps, m, sample, bits;
+                    /* we encode 3 sub band samples of the same sub band at a time */
+                    qindex = s->alloc_table[j+b];
+                    steps = quant_steps[qindex];
+                    for(m=0;m<3;m++) {
+                        sample = s->sb_samples[k][l + m][i];
+                        /* divide by scale factor */
+#ifdef USE_FLOATS
+                        {
+                            float a;
+                            a = (float)sample * scale_factor_inv_table[s->scale_factors[i][k]];
+                            q[m] = (int)((a + 1.0) * steps * 0.5);
+                        }
+#else
+                        {
+                            int q1, e, shift, mult;
+                            e = s->scale_factors[i][k];
+                            shift = scale_factor_shift[e];
+                            mult = scale_factor_mult[e];
+
+                            /* normalize to P bits */
+                            if (shift < 0)
+                                q1 = sample << (-shift);
+                            else
+                                q1 = sample >> shift;
+                            q1 = (q1 * mult) >> P;
+                            q[m] = ((q1 + (1 << P)) * steps) >> (P + 1);
+                        }
+#endif
+                        if (q[m] >= steps)
+                            q[m] = steps - 1;
+                        assert(q[m] >= 0 && q[m] < steps);
+                    }
+                    bits = quant_bits[qindex];
+                    if (bits < 0) {
+                        /* group the 3 values to save bits */
+                        put_bits(p, -bits, 
+                                 q[0] + steps * (q[1] + steps * q[2]));
+#if 0
+                        printf("%d: gr1 %d\n", 
+                               i, q[0] + steps * (q[1] + steps * q[2]));
+#endif
+                    } else {
+#if 0
+                        printf("%d: gr3 %d %d %d\n", 
+                               i, q[0], q[1], q[2]);
+#endif                               
+                        put_bits(p, bits, q[0]);
+                        put_bits(p, bits, q[1]);
+                        put_bits(p, bits, q[2]);
+                    }
+                }
+                /* next subband in alloc table */
+                j += 1 << bit_alloc_bits; 
+            }
+        }
+    }
+
+    /* padding */
+    for(i=0;i<padding;i++)
+        put_bits(p, 1, 0);
+
+    /* flush */
+    flush_put_bits(p);
+}
+
+int MPA_encode_frame(AVEncodeContext *avctx,
+                     unsigned char *frame, int buf_size, void *data)
+{
+    MpegAudioContext *s = avctx->priv_data;
+    short *samples = data;
+    short smr[SBLIMIT];
+    unsigned char bit_alloc[SBLIMIT];
+    int padding;
+
+    filter(s, samples);
+    compute_scale_factors(s->scale_code, s->scale_factors, 
+                          s->sb_samples, s->sblimit);
+    psycho_acoustic_model(s, smr);
+    compute_bit_allocation(s, smr, bit_alloc, &padding);
+
+    init_put_bits(&s->pb, frame, MPA_MAX_CODED_FRAME_SIZE, NULL, NULL);
+
+    encode_frame(s, bit_alloc, padding);
+    
+    s->nb_samples += MPA_FRAME_SIZE;
+    return s->pb.buf_ptr - s->pb.buf;
+}
+
+
+AVEncoder mp2_encoder = {
+    "mp2",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP2,
+    sizeof(MpegAudioContext),
+    MPA_encode_init,
+    MPA_encode_frame,
+    NULL,
+};
diff --git a/libav/mpegaudio.h b/libav/mpegaudio.h
new file mode 100644
index 0000000000..0734d3466b
--- /dev/null
+++ b/libav/mpegaudio.h
@@ -0,0 +1,31 @@
+
+/* max compressed frame size */
+#define MPA_MAX_CODED_FRAME_SIZE 1200
+
+#define MPA_FRAME_SIZE 1152 
+
+#define SAMPLES_BUF_SIZE 4096
+#define SBLIMIT 32 /* number of subbands */
+#define DCT_BITS 14 /* number of bits for the DCT */
+#define MUL(a,b) (((a) * (b)) >> DCT_BITS)
+#define FIX(a)   ((int)((a) * (1 << DCT_BITS)))
+
+typedef struct MpegAudioContext {
+    PutBitContext pb;
+    int freq, bit_rate;
+    int lsf;           /* 1 if mpeg2 low bitrate selected */
+    int bitrate_index; /* bit rate */
+    int freq_index;
+    int frame_size; /* frame size, in bits, without padding */
+    long long nb_samples; /* total number of samples encoded */
+    /* padding computation */
+    int frame_frac, frame_frac_incr, do_padding;
+    short samples_buf[SAMPLES_BUF_SIZE]; /* buffer for filter */
+    int samples_offset;       /* offset in samples_buf */
+    int sb_samples[3][12][SBLIMIT];
+    unsigned char scale_factors[SBLIMIT][3]; /* scale factors */
+    unsigned char scale_code[SBLIMIT];       /* code to group 3 scale factors */
+    int sblimit; /* number of used subbands */
+    const unsigned char *alloc_table;
+} MpegAudioContext;
+
diff --git a/libav/mpegaudiotab.h b/libav/mpegaudiotab.h
new file mode 100644
index 0000000000..05bdb9eea1
--- /dev/null
+++ b/libav/mpegaudiotab.h
@@ -0,0 +1,310 @@
+/*
+ * mpeg audio layer 2 tables. Most of them come from the mpeg audio
+ * specification.
+ * 
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * The licence of this code is contained in file LICENCE found in the
+ * same archive 
+ */
+
+static const unsigned short bitrate_tab[2][15] = {
+    {0,8,16,24,32,40,48,56,64,80,96,112,128,144,160}, /* mpeg2 lsf */
+    {0,32,48,56,64,80,96,112,128,160,192,224,256,320,384}, /* mpeg1 */
+};
+
+static const unsigned short freq_tab[3] = { 44100, 48000, 32000 };
+
+#define SQRT2 1.41421356237309514547
+
+static const int costab32[30] = {
+    FIX(0.54119610014619701222),
+    FIX(1.3065629648763763537),
+    
+    FIX(0.50979557910415917998),
+    FIX(2.5629154477415054814),
+    FIX(0.89997622313641556513),
+    FIX(0.60134488693504528634),
+    
+    FIX(0.5024192861881556782),
+    FIX(5.1011486186891552563),
+    FIX(0.78815462345125020249),
+    FIX(0.64682178335999007679),
+    FIX(0.56694403481635768927),
+    FIX(1.0606776859903470633),
+    FIX(1.7224470982383341955),
+    FIX(0.52249861493968885462),
+    
+    FIX(10.19000812354803287),
+    FIX(0.674808341455005678),
+    FIX(1.1694399334328846596),
+    FIX(0.53104259108978413284),
+    FIX(2.0577810099534108446),
+    FIX(0.58293496820613388554),
+    FIX(0.83934964541552681272),
+    FIX(0.50547095989754364798),
+    FIX(3.4076084184687189804),
+    FIX(0.62250412303566482475),
+    FIX(0.97256823786196078263),
+    FIX(0.51544730992262455249),
+    FIX(1.4841646163141661852),
+    FIX(0.5531038960344445421),
+    FIX(0.74453627100229857749),
+    FIX(0.5006029982351962726),
+};
+
+static const int bitinv32[32] = {
+    0,  16,  8, 24,  4,  20,  12,  28,
+    2,  18, 10, 26,  6,  22,  14,  30,
+    1,  17,  9, 25,  5,  21,  13,  29,
+    3,  19, 11, 27,  7,  23,  15,  31
+};
+
+
+static short filter_bank[512];
+
+static const double enwindow[512] = {0.000000000,
+                        -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000954, -0.000000954,
+                        -0.000000954, -0.000000954, -0.000001431, -0.000001431, -0.000001907, -0.000001907, -0.000002384, -0.000002384,
+                        -0.000002861, -0.000003338, -0.000003338, -0.000003815, -0.000004292, -0.000004768, -0.000005245, -0.000006199,
+                        -0.000006676, -0.000007629, -0.000008106, -0.000009060, -0.000010014, -0.000011444, -0.000012398, -0.000013828,
+                        -0.000014782, -0.000016689, -0.000018120, -0.000019550, -0.000021458, -0.000023365, -0.000025272, -0.000027657,
+                        -0.000030041, -0.000032425, -0.000034809, -0.000037670, -0.000040531, -0.000043392, -0.000046253, -0.000049591,
+                        -0.000052929, -0.000055790, -0.000059605, -0.000062943, -0.000066280, -0.000070095, -0.000073433, -0.000076771,
+                        -0.000080585, -0.000083923, -0.000087261, -0.000090599, -0.000093460, -0.000096321, -0.000099182, 0.000101566,
+                        0.000103951, 0.000105858, 0.000107288, 0.000108242, 0.000108719, 0.000108719, 0.000108242, 0.000106812,
+                        0.000105381, 0.000102520, 0.000099182, 0.000095367, 0.000090122, 0.000084400, 0.000077724, 0.000069618,
+                        0.000060558, 0.000050545, 0.000039577, 0.000027180, 0.000013828, -0.000000954, -0.000017166, -0.000034332,
+                        -0.000052929, -0.000072956, -0.000093937, -0.000116348, -0.000140190, -0.000165462, -0.000191212, -0.000218868,
+                        -0.000247478, -0.000277042, -0.000307560, -0.000339031, -0.000371456, -0.000404358, -0.000438213, -0.000472546,
+                        -0.000507355, -0.000542164, -0.000576973, -0.000611782, -0.000646591, -0.000680923, -0.000714302, -0.000747204,
+                        -0.000779152, -0.000809669, -0.000838757, -0.000866413, -0.000891685, -0.000915051, -0.000935555, -0.000954151,
+                        -0.000968933, -0.000980854, -0.000989437, -0.000994205, -0.000995159, -0.000991821, -0.000983715, 0.000971317,
+                        0.000953674, 0.000930786, 0.000902653, 0.000868797, 0.000829220, 0.000783920, 0.000731945, 0.000674248,
+                        0.000610352, 0.000539303, 0.000462532, 0.000378609, 0.000288486, 0.000191689, 0.000088215, -0.000021458,
+                        -0.000137329, -0.000259876, -0.000388145, -0.000522137, -0.000661850, -0.000806808, -0.000956535, -0.001111031,
+                        -0.001269817, -0.001432419, -0.001597881, -0.001766682, -0.001937389, -0.002110004, -0.002283096, -0.002457142,
+                        -0.002630711, -0.002803326, -0.002974033, -0.003141880, -0.003306866, -0.003467083, -0.003622532, -0.003771782,
+                        -0.003914356, -0.004048824, -0.004174709, -0.004290581, -0.004395962, -0.004489899, -0.004570484, -0.004638195,
+                        -0.004691124, -0.004728317, -0.004748821, -0.004752159, -0.004737377, -0.004703045, -0.004649162, -0.004573822,
+                        -0.004477024, -0.004357815, -0.004215240, -0.004049301, -0.003858566, -0.003643036, -0.003401756, 0.003134727,
+                        0.002841473, 0.002521515, 0.002174854, 0.001800537, 0.001399517, 0.000971317, 0.000515938, 0.000033379,
+                        -0.000475883, -0.001011848, -0.001573563, -0.002161503, -0.002774239, -0.003411293, -0.004072189, -0.004756451,
+                        -0.005462170, -0.006189346, -0.006937027, -0.007703304, -0.008487225, -0.009287834, -0.010103703, -0.010933399,
+                        -0.011775017, -0.012627602, -0.013489246, -0.014358521, -0.015233517, -0.016112804, -0.016994476, -0.017876148,
+                        -0.018756866, -0.019634247, -0.020506859, -0.021372318, -0.022228718, -0.023074150, -0.023907185, -0.024725437,
+                        -0.025527000, -0.026310921, -0.027073860, -0.027815342, -0.028532982, -0.029224873, -0.029890060, -0.030526638,
+                        -0.031132698, -0.031706810, -0.032248020, -0.032754898, -0.033225536, -0.033659935, -0.034055710, -0.034412861,
+                        -0.034730434, -0.035007000, -0.035242081, -0.035435200, -0.035586357, -0.035694122, -0.035758972, 0.035780907,
+                        0.035758972, 0.035694122, 0.035586357, 0.035435200, 0.035242081, 0.035007000, 0.034730434, 0.034412861,
+                        0.034055710, 0.033659935, 0.033225536, 0.032754898, 0.032248020, 0.031706810, 0.031132698, 0.030526638,
+                        0.029890060, 0.029224873, 0.028532982, 0.027815342, 0.027073860, 0.026310921, 0.025527000, 0.024725437,
+                        0.023907185, 0.023074150, 0.022228718, 0.021372318, 0.020506859, 0.019634247, 0.018756866, 0.017876148,
+                        0.016994476, 0.016112804, 0.015233517, 0.014358521, 0.013489246, 0.012627602, 0.011775017, 0.010933399,
+                        0.010103703, 0.009287834, 0.008487225, 0.007703304, 0.006937027, 0.006189346, 0.005462170, 0.004756451,
+                        0.004072189, 0.003411293, 0.002774239, 0.002161503, 0.001573563, 0.001011848, 0.000475883, -0.000033379,
+                        -0.000515938, -0.000971317, -0.001399517, -0.001800537, -0.002174854, -0.002521515, -0.002841473, 0.003134727,
+                        0.003401756, 0.003643036, 0.003858566, 0.004049301, 0.004215240, 0.004357815, 0.004477024, 0.004573822,
+                        0.004649162, 0.004703045, 0.004737377, 0.004752159, 0.004748821, 0.004728317, 0.004691124, 0.004638195,
+                        0.004570484, 0.004489899, 0.004395962, 0.004290581, 0.004174709, 0.004048824, 0.003914356, 0.003771782,
+                        0.003622532, 0.003467083, 0.003306866, 0.003141880, 0.002974033, 0.002803326, 0.002630711, 0.002457142,
+                        0.002283096, 0.002110004, 0.001937389, 0.001766682, 0.001597881, 0.001432419, 0.001269817, 0.001111031,
+                        0.000956535, 0.000806808, 0.000661850, 0.000522137, 0.000388145, 0.000259876, 0.000137329, 0.000021458,
+                        -0.000088215, -0.000191689, -0.000288486, -0.000378609, -0.000462532, -0.000539303, -0.000610352, -0.000674248,
+                        -0.000731945, -0.000783920, -0.000829220, -0.000868797, -0.000902653, -0.000930786, -0.000953674, 0.000971317,
+                        0.000983715, 0.000991821, 0.000995159, 0.000994205, 0.000989437, 0.000980854, 0.000968933, 0.000954151,
+                        0.000935555, 0.000915051, 0.000891685, 0.000866413, 0.000838757, 0.000809669, 0.000779152, 0.000747204,
+                        0.000714302, 0.000680923, 0.000646591, 0.000611782, 0.000576973, 0.000542164, 0.000507355, 0.000472546,
+                        0.000438213, 0.000404358, 0.000371456, 0.000339031, 0.000307560, 0.000277042, 0.000247478, 0.000218868,
+                        0.000191212, 0.000165462, 0.000140190, 0.000116348, 0.000093937, 0.000072956, 0.000052929, 0.000034332,
+                        0.000017166, 0.000000954, -0.000013828, -0.000027180, -0.000039577, -0.000050545, -0.000060558, -0.000069618,
+                        -0.000077724, -0.000084400, -0.000090122, -0.000095367, -0.000099182, -0.000102520, -0.000105381, -0.000106812,
+                        -0.000108242, -0.000108719, -0.000108719, -0.000108242, -0.000107288, -0.000105858, -0.000103951, 0.000101566,
+                        0.000099182, 0.000096321, 0.000093460, 0.000090599, 0.000087261, 0.000083923, 0.000080585, 0.000076771,
+                        0.000073433, 0.000070095, 0.000066280, 0.000062943, 0.000059605, 0.000055790, 0.000052929, 0.000049591,
+                        0.000046253, 0.000043392, 0.000040531, 0.000037670, 0.000034809, 0.000032425, 0.000030041, 0.000027657,
+                        0.000025272, 0.000023365, 0.000021458, 0.000019550, 0.000018120, 0.000016689, 0.000014782, 0.000013828,
+                        0.000012398, 0.000011444, 0.000010014, 0.000009060, 0.000008106, 0.000007629, 0.000006676, 0.000006199,
+                        0.000005245, 0.000004768, 0.000004292, 0.000003815, 0.000003338, 0.000003338, 0.000002861, 0.000002384,
+                        0.000002384, 0.000001907, 0.000001907, 0.000001431, 0.000001431, 0.000000954, 0.000000954, 0.000000954,
+                        0.000000954, 0.000000477, 0.000000477, 0.000000477, 0.000000477, 0.000000477, 0.000000477
+                       };
+
+static int scale_factor_table[64];
+#ifdef USE_FLOATS
+static float scale_factor_inv_table[64];
+#else
+static INT8 scale_factor_shift[64];
+static unsigned short scale_factor_mult[64];
+#endif
+static unsigned char scale_diff_table[128];
+
+static const int sblimit_table[5] = { 27 , 30 , 8, 12 , 30 };
+
+static const int quant_steps[17] = {
+    3,     5,    7,    9,    15,
+    31,    63,  127,  255,   511,
+    1023,  2047, 4095, 8191, 16383,
+    32767, 65535
+};
+
+/* we use a negative value if grouped */
+static const int quant_bits[17] = {
+    -5,  -7,  3, -10, 4, 
+     5,  6,  7,  8,  9,
+    10, 11, 12, 13, 14,
+    15, 16 
+};
+
+/* signal to noise ratio of each quantification step (could be
+   computed from quant_steps[]). The values are dB multiplied by 10 
+*/
+static unsigned short quant_snr[17] = { 
+     70, 110, 160, 208,
+    253, 316, 378, 439,
+    499, 559, 620, 680, 
+    740, 800, 861, 920, 
+    980
+};
+
+
+/* total number of bits per allocation group */
+static unsigned short total_quant_bits[17];
+
+/* encoding tables which give the quantization index. Note how it is
+   possible to store them efficiently ! */
+static const unsigned char alloc_table_0[] = {
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+};
+
+static const unsigned char alloc_table_1[] = {
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 3,  0,  1,  2,  3,  4,  5, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+ 2,  0,  1, 16, 
+};
+
+static const unsigned char alloc_table_2[] = {
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+};
+
+static const unsigned char alloc_table_3[] = {
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+};
+
+static const unsigned char alloc_table_4[] = {
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 3,  0,  1,  3,  4,  5,  6,  7, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+ 2,  0,  1,  3, 
+};
+
+const unsigned char *alloc_tables[5] = 
+{ alloc_table_0, alloc_table_1, alloc_table_2, alloc_table_3, alloc_table_4, };
+
+/* fixed psycho acoustic model. Values of SNR taken from the 'toolame'
+   project */
+const float fixed_smr[SBLIMIT] =  {
+    30, 17, 16, 10, 3, 12, 8, 2.5,
+    5, 5, 6, 6, 5, 6, 10, 6,
+    -4, -10, -21, -30, -42, -55, -68, -75,
+    -75, -75, -75, -75, -91, -107, -110, -108
+};
+
+const unsigned char nb_scale_factors[4] = { 3, 2, 1, 2 };
diff --git a/libav/mpegencodevlc.h b/libav/mpegencodevlc.h
new file mode 100644
index 0000000000..3952fd0472
--- /dev/null
+++ b/libav/mpegencodevlc.h
@@ -0,0 +1,311 @@
+/*
+ * RV 1.0 compatible encoder.
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * The licence of this code is contained in file LICENCE found in the
+ * same archive 
+ */
+
+const unsigned char vlc_dc_table[256] = {
+    0, 1, 2, 2,
+    3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+};
+
+const unsigned char vlc_dc_lum_code[9] = {
+    0x4, 0x0, 0x1, 0x5, 0x6, 0xe, 0x1e, 0x3e, 0x7e,
+};
+const unsigned char vlc_dc_lum_bits[9] = {
+    3, 2, 2, 3, 3, 4, 5, 6, 7,
+};
+
+const unsigned char vlc_dc_chroma_code[9] = {
+    0x0, 0x1, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe,
+};
+const unsigned char vlc_dc_chroma_bits[9] = {
+    2, 2, 2, 3, 4, 5, 6, 7, 8,
+};
+
+/*
+ * Copyright (c) 1995 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+ * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#define HUFF_MAXRUN	32
+#define HUFF_MAXLEVEL	41
+
+static const int huff_maxlevel[HUFF_MAXRUN] = { 41, 19, 6, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 };
+
+static const UINT8 huff_table0[41] = { 0x0, 0x6, 0x8, 0xa, 0xc, 0x4c, 0x42, 0x14, 0x3a, 0x30, 0x26, 0x20, 0x34, 0x32, 0x30, 0x2e, 0x3e, 0x3c, 0x3a, 0x38, 0x36, 0x34, 0x32, 0x30, 0x2e, 0x2c, 0x2a, 0x28, 0x26, 0x24, 0x22, 0x20, 0x30, 0x2e, 0x2c, 0x2a, 0x28, 0x26, 0x24, 0x22, 0x20 };
+static const UINT8 huff_bits0[41] = { 0, 3, 5, 6, 8, 9, 9, 11, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16 };
+
+static const UINT8 huff_table1[19] = { 0x0, 0x6, 0xc, 0x4a, 0x18, 0x36, 0x2c, 0x2a, 0x3e, 0x3c, 0x3a, 0x38, 0x36, 0x34, 0x32, 0x26, 0x24, 0x22, 0x20 };
+static const UINT8 huff_bits1[19] = { 0, 4, 7, 9, 11, 13, 14, 14, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17 };
+
+static const UINT8 huff_table2[6] = { 0x0, 0xa, 0x8, 0x16, 0x28, 0x28 };
+static const UINT8 huff_bits2[6] = { 0, 5, 8, 11, 13, 14 };
+
+static const UINT8 huff_table3[5] = { 0x0, 0xe, 0x48, 0x38, 0x26 };
+static const UINT8 huff_bits3[5] = { 0, 6, 9, 13, 14 };
+
+static const UINT8 huff_table4[4] = { 0x0, 0xc, 0x1e, 0x24 };
+static const UINT8 huff_bits4[4] = { 0, 6, 11, 13 };
+
+static const UINT8 huff_table5[4] = { 0x0, 0xe, 0x12, 0x24 };
+static const UINT8 huff_bits5[4] = { 0, 7, 11, 14 };
+
+static const UINT8 huff_table6[4] = { 0x0, 0xa, 0x3c, 0x28 };
+static const UINT8 huff_bits6[4] = { 0, 7, 13, 17 };
+
+static const UINT8 huff_table7[3] = { 0x0, 0x8, 0x2a };
+static const UINT8 huff_bits7[3] = { 0, 7, 13 };
+
+static const UINT8 huff_table8[3] = { 0x0, 0xe, 0x22 };
+static const UINT8 huff_bits8[3] = { 0, 8, 13 };
+
+static const UINT8 huff_table9[3] = { 0x0, 0xa, 0x22 };
+static const UINT8 huff_bits9[3] = { 0, 8, 14 };
+
+static const UINT8 huff_table10[3] = { 0x0, 0x4e, 0x20 };
+static const UINT8 huff_bits10[3] = { 0, 9, 14 };
+
+static const UINT8 huff_table11[3] = { 0x0, 0x46, 0x34 };
+static const UINT8 huff_bits11[3] = { 0, 9, 17 };
+
+static const UINT8 huff_table12[3] = { 0x0, 0x44, 0x32 };
+static const UINT8 huff_bits12[3] = { 0, 9, 17 };
+
+static const UINT8 huff_table13[3] = { 0x0, 0x40, 0x30 };
+static const UINT8 huff_bits13[3] = { 0, 9, 17 };
+
+static const UINT8 huff_table14[3] = { 0x0, 0x1c, 0x2e };
+static const UINT8 huff_bits14[3] = { 0, 11, 17 };
+
+static const UINT8 huff_table15[3] = { 0x0, 0x1a, 0x2c };
+static const UINT8 huff_bits15[3] = { 0, 11, 17 };
+
+static const UINT8 huff_table16[3] = { 0x0, 0x10, 0x2a };
+static const UINT8 huff_bits16[3] = { 0, 11, 17 };
+
+static const UINT8 huff_table17[2] = { 0x0, 0x3e };
+static const UINT8 huff_bits17[2] = { 0, 13 };
+
+static const UINT8 huff_table18[2] = { 0x0, 0x34 };
+static const UINT8 huff_bits18[2] = { 0, 13 };
+
+static const UINT8 huff_table19[2] = { 0x0, 0x32 };
+static const UINT8 huff_bits19[2] = { 0, 13 };
+
+static const UINT8 huff_table20[2] = { 0x0, 0x2e };
+static const UINT8 huff_bits20[2] = { 0, 13 };
+
+static const UINT8 huff_table21[2] = { 0x0, 0x2c };
+static const UINT8 huff_bits21[2] = { 0, 13 };
+
+static const UINT8 huff_table22[2] = { 0x0, 0x3e };
+static const UINT8 huff_bits22[2] = { 0, 14 };
+
+static const UINT8 huff_table23[2] = { 0x0, 0x3c };
+static const UINT8 huff_bits23[2] = { 0, 14 };
+
+static const UINT8 huff_table24[2] = { 0x0, 0x3a };
+static const UINT8 huff_bits24[2] = { 0, 14 };
+
+static const UINT8 huff_table25[2] = { 0x0, 0x38 };
+static const UINT8 huff_bits25[2] = { 0, 14 };
+
+static const UINT8 huff_table26[2] = { 0x0, 0x36 };
+static const UINT8 huff_bits26[2] = { 0, 14 };
+
+static const UINT8 huff_table27[2] = { 0x0, 0x3e };
+static const UINT8 huff_bits27[2] = { 0, 17 };
+
+static const UINT8 huff_table28[2] = { 0x0, 0x3c };
+static const UINT8 huff_bits28[2] = { 0, 17 };
+
+static const UINT8 huff_table29[2] = { 0x0, 0x3a };
+static const UINT8 huff_bits29[2] = { 0, 17 };
+
+static const UINT8 huff_table30[2] = { 0x0, 0x38 };
+static const UINT8 huff_bits30[2] = { 0, 17 };
+
+static const UINT8 huff_table31[2] = { 0x0, 0x36 };
+static const UINT8 huff_bits31[2] = { 0, 17 };
+
+static const UINT8 *huff_table[32] = { huff_table0, huff_table1, huff_table2, huff_table3, huff_table4, huff_table5, huff_table6, huff_table7, huff_table8, huff_table9, huff_table10, huff_table11, huff_table12, huff_table13, huff_table14, huff_table15, huff_table16, huff_table17, huff_table18, huff_table19, huff_table20, huff_table21, huff_table22, huff_table23, huff_table24, huff_table25, huff_table26, huff_table27, huff_table28, huff_table29, huff_table30, huff_table31 };
+
+static const UINT8 *huff_bits[32] = { huff_bits0, huff_bits1, huff_bits2, huff_bits3, huff_bits4, huff_bits5, huff_bits6, huff_bits7, huff_bits8, huff_bits9, huff_bits10, huff_bits11, huff_bits12, huff_bits13, huff_bits14, huff_bits15, huff_bits16, huff_bits17, huff_bits18, huff_bits19, huff_bits20, huff_bits21, huff_bits22, huff_bits23, huff_bits24, huff_bits25, huff_bits26, huff_bits27, huff_bits28, huff_bits29, huff_bits30, huff_bits31 };
+
+static const UINT8 mbAddrIncrTable[][2] = {
+    {0x0, 0},
+    {0x1, 1},
+    {0x3, 3},
+    {0x2, 3},
+    {0x3, 4},
+    {0x2, 4},
+    {0x3, 5},
+    {0x2, 5},
+    {0x7, 7},
+    {0x6, 7},
+    {0xb, 8},
+    {0xa, 8},
+    {0x9, 8},
+    {0x8, 8},
+    {0x7, 8},
+    {0x6, 8},
+    {0x17, 10},
+    {0x16, 10},
+    {0x15, 10},
+    {0x14, 10},
+    {0x13, 10},
+    {0x12, 10},
+    {0x23, 11},
+    {0x22, 11},
+    {0x21, 11},
+    {0x20, 11},
+    {0x1f, 11},
+    {0x1e, 11},
+    {0x1d, 11},
+    {0x1c, 11},
+    {0x1b, 11},
+    {0x1a, 11},
+    {0x19, 11},
+    {0x18, 11}};
+
+static const UINT8 mbPatTable[][2] = {
+    {0x0, 0},
+    {0xb, 5},
+    {0x9, 5},
+    {0xd, 6},
+    {0xd, 4},
+    {0x17, 7},
+    {0x13, 7},
+    {0x1f, 8},
+    {0xc, 4},
+    {0x16, 7},
+    {0x12, 7},
+    {0x1e, 8},
+    {0x13, 5},
+    {0x1b, 8},
+    {0x17, 8},
+    {0x13, 8},
+    {0xb, 4},
+    {0x15, 7},
+    {0x11, 7},
+    {0x1d, 8},
+    {0x11, 5},
+    {0x19, 8},
+    {0x15, 8},
+    {0x11, 8},
+    {0xf, 6},
+    {0xf, 8},
+    {0xd, 8},
+    {0x3, 9},
+    {0xf, 5},
+    {0xb, 8},
+    {0x7, 8},
+    {0x7, 9},
+    {0xa, 4},
+    {0x14, 7},
+    {0x10, 7},
+    {0x1c, 8},
+    {0xe, 6},
+    {0xe, 8},
+    {0xc, 8},
+    {0x2, 9},
+    {0x10, 5},
+    {0x18, 8},
+    {0x14, 8},
+    {0x10, 8},
+    {0xe, 5},
+    {0xa, 8},
+    {0x6, 8},
+    {0x6, 9},
+    {0x12, 5},
+    {0x1a, 8},
+    {0x16, 8},
+    {0x12, 8},
+    {0xd, 5},
+    {0x9, 8},
+    {0x5, 8},
+    {0x5, 9},
+    {0xc, 5},
+    {0x8, 8},
+    {0x4, 8},
+    {0x4, 9},
+    {0x7, 3},
+    {0xa, 5},	/* grrr... 61, 62, 63 added - Kevin */
+    {0x8, 5},
+    {0xc, 6}
+};
+
+const UINT8 zigzag_direct[64] = {
+    0, 1, 8, 16, 9, 2, 3, 10,
+    17, 24, 32, 25, 18, 11, 4, 5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13, 6, 7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static unsigned char const default_intra_matrix[64] = {
+	8, 16, 19, 22, 26, 27, 29, 34,
+	16, 16, 22, 24, 27, 29, 34, 37,
+	19, 22, 26, 27, 29, 34, 34, 38,
+	22, 22, 26, 27, 29, 34, 37, 40,
+	22, 26, 27, 29, 32, 35, 40, 48,
+	26, 27, 29, 32, 35, 40, 48, 58,
+	26, 27, 29, 34, 38, 46, 56, 69,
+	27, 29, 35, 38, 46, 56, 69, 83
+};
+
+/* XXX: could hardcode this matrix */
+static unsigned char const default_non_intra_matrix[64] = {
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+};
+
+static unsigned char const frame_rate_tab[9] = {
+    0, 24, 24, 25, 30, 30, 50, 60, 60,
+};
diff --git a/libav/mpegvideo.c b/libav/mpegvideo.c
new file mode 100644
index 0000000000..4987b38af4
--- /dev/null
+++ b/libav/mpegvideo.c
@@ -0,0 +1,1098 @@
+/*
+ * The simplest mpeg encoder
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <netinet/in.h>
+#include <math.h>
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+//#define DEBUG
+
+/* depends on JPEG librarie */
+extern void jpeg_fdct_ifast (DCTELEM * data);
+
+/* depends on mpeg */
+extern void j_rev_dct (DCTELEM *data);
+
+/* for jpeg fast DCT */
+#define CONST_BITS 14
+
+static const unsigned short aanscales[64] = {
+    /* precomputed values scaled up by 14 bits */
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+};
+
+static UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
+static UINT32 squareTbl[512];
+
+static void encode_picture(MpegEncContext *s, int picture_number);
+static void rate_control_init(MpegEncContext *s);
+static int rate_estimate_qscale(MpegEncContext *s);
+static void mpeg1_skip_picture(MpegEncContext *s, int pict_num);
+
+#include "mpegencodevlc.h"
+
+static void put_header(MpegEncContext *s, int header)
+{
+    align_put_bits(&s->pb);
+    put_bits(&s->pb, 32, header);
+}
+
+static void convert_matrix(int *qmat, const UINT8 *quant_matrix, int qscale)
+{
+    int i;
+
+    for(i=0;i<64;i++) {
+        qmat[i] = (int)((1 << 22) * 16384.0 / (aanscales[i] * qscale * quant_matrix[i]));
+    }
+}
+
+
+int MPV_encode_init(AVEncodeContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int pict_size, c_size, i;
+    UINT8 *pict;
+
+    s->bit_rate = avctx->bit_rate;
+    s->frame_rate = avctx->rate;
+    s->width = avctx->width;
+    s->height = avctx->height;
+    s->gop_size = avctx->gop_size;
+    if (s->gop_size <= 1) {
+        s->intra_only = 1;
+        s->gop_size = 12;
+    } else {
+        s->intra_only = 0;
+    }
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_MPEG1VIDEO:
+        s->out_format = FMT_MPEG1;
+        break;
+    case CODEC_ID_MJPEG:
+        s->out_format = FMT_MJPEG;
+        s->intra_only = 1; /* force intra only for jpeg */
+        if (mjpeg_init(s) < 0)
+            return -1;
+        break;
+    case CODEC_ID_H263:
+        s->out_format = FMT_H263;
+        break;
+    case CODEC_ID_RV10:
+        s->out_format = FMT_H263;
+        s->h263_rv10 = 1;
+        break;
+    default:
+        return -1;
+    }
+
+    switch(s->frame_rate) {
+    case 24:
+        s->frame_rate_index = 2;
+        break;
+    case 25:
+        s->frame_rate_index = 3;
+        break;
+    case 30:
+        s->frame_rate_index = 5;
+        break;
+    case 50:
+        s->frame_rate_index = 6;
+        break;
+    case 60:
+        s->frame_rate_index = 8;
+        break;
+    default:
+        /* we accept lower frame rates than 24 for low bit rate mpeg */
+        if (s->frame_rate >= 1 && s->frame_rate < 24) {
+            s->frame_rate_index = 2;
+        } else {
+            return -1;
+        }
+        break;
+    }
+
+    /* init */
+    s->mb_width = s->width / 16;
+    s->mb_height = s->height / 16;
+    
+    c_size = s->width * s->height;
+    pict_size = (c_size * 3) / 2;
+    pict = malloc(pict_size);
+    if (pict == NULL)
+        return -1;
+    s->last_picture[0] = pict;
+    s->last_picture[1] = pict + c_size;
+    s->last_picture[2] = pict + c_size + (c_size / 4);
+    
+    pict = malloc(pict_size);
+    if (pict == NULL)
+        return -1;
+    s->last_picture[0] = pict;
+    s->last_picture[1] = pict + c_size;
+    s->last_picture[2] = pict + c_size + (c_size / 4);
+
+    pict = malloc(pict_size);
+    if (pict == NULL) {
+        free(s->last_picture[0]);
+        return -1;
+    }
+    s->current_picture[0] = pict;
+    s->current_picture[1] = pict + c_size;
+    s->current_picture[2] = pict + c_size + (c_size / 4);
+
+    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
+    for(i=0;i<MAX_NEG_CROP;i++) {
+        cropTbl[i] = 0;
+        cropTbl[i + MAX_NEG_CROP + 256] = 255;
+    }
+
+    for(i=0;i<512;i++) {
+        squareTbl[i] = (i - 256) * (i - 256);
+    }
+    
+    /* rate control init */
+    rate_control_init(s);
+
+    s->picture_number = 0;
+    s->fake_picture_number = 0;
+
+    return 0;
+}
+
+int MPV_encode_end(AVEncodeContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+#if 0
+    /* end of sequence */
+    if (s->out_format == FMT_MPEG1) {
+        put_header(s, SEQ_END_CODE);
+    }
+
+    if (!s->flush_frames)
+        flush_put_bits(&s->pb);
+#endif    
+    free(s->last_picture[0]);
+    free(s->current_picture[0]);
+    if (s->out_format == FMT_MJPEG)
+        mjpeg_close(s);
+    return 0;
+}
+
+int MPV_encode_picture(AVEncodeContext *avctx,
+                       unsigned char *buf, int buf_size, void *data)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int i;
+
+    memcpy(s->new_picture, data, 3 * sizeof(UINT8 *));
+
+    init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
+
+    /* group of picture */
+    if (s->out_format == FMT_MPEG1) {
+        unsigned int vbv_buffer_size;
+        unsigned int time_code, fps, n;
+
+        if ((s->picture_number % s->gop_size) == 0) {
+            /* mpeg1 header repeated every gop */
+            put_header(s, SEQ_START_CODE);
+            
+            put_bits(&s->pb, 12, s->width);
+            put_bits(&s->pb, 12, s->height);
+            put_bits(&s->pb, 4, 1); /* 1/1 aspect ratio */
+            put_bits(&s->pb, 4, s->frame_rate_index);
+            put_bits(&s->pb, 18, 0x3ffff);
+            put_bits(&s->pb, 1, 1); /* marker */
+            /* vbv buffer size: slightly greater than an I frame. We add
+               some margin just in case */
+            vbv_buffer_size = (3 * s->I_frame_bits) / (2 * 8);
+            put_bits(&s->pb, 10, (vbv_buffer_size + 16383) / 16384); 
+            put_bits(&s->pb, 1, 1); /* constrained parameter flag */
+            put_bits(&s->pb, 1, 0); /* no custom intra matrix */
+            put_bits(&s->pb, 1, 0); /* no custom non intra matrix */
+
+            put_header(s, GOP_START_CODE);
+            put_bits(&s->pb, 1, 0); /* do drop frame */
+            /* time code : we must convert from the real frame rate to a
+               fake mpeg frame rate in case of low frame rate */
+            fps = frame_rate_tab[s->frame_rate_index];
+            time_code = s->fake_picture_number;
+            s->gop_picture_number = time_code;
+            put_bits(&s->pb, 5, (time_code / (fps * 3600)) % 24);
+            put_bits(&s->pb, 6, (time_code / (fps * 60)) % 60);
+            put_bits(&s->pb, 1, 1);
+            put_bits(&s->pb, 6, (time_code / fps) % 60);
+            put_bits(&s->pb, 6, (time_code % fps));
+            put_bits(&s->pb, 1, 1); /* closed gop */
+            put_bits(&s->pb, 1, 0); /* broken link */
+        }
+
+        if (s->frame_rate < 24 && s->picture_number > 0) {
+            /* insert empty P pictures to slow down to the desired
+               frame rate. Each fake pictures takes about 20 bytes */
+            fps = frame_rate_tab[s->frame_rate_index];
+            n = ((s->picture_number * fps) / s->frame_rate) - 1;
+            while (s->fake_picture_number < n) {
+                mpeg1_skip_picture(s, s->fake_picture_number - 
+                                   s->gop_picture_number); 
+                s->fake_picture_number++;
+            }
+
+        }
+        s->fake_picture_number++;
+    }
+    
+    
+    if (!s->intra_only) {
+        /* first picture of GOP is intra */
+        if ((s->picture_number % s->gop_size) == 0)
+            s->pict_type = I_TYPE;
+        else
+            s->pict_type = P_TYPE;
+    } else {
+        s->pict_type = I_TYPE;
+    }
+    avctx->key_frame = (s->pict_type == I_TYPE);
+    
+    encode_picture(s, s->picture_number);
+    
+    /* swap current and last picture */
+    for(i=0;i<3;i++) {
+        UINT8 *tmp;
+        
+        tmp = s->last_picture[i];
+        s->last_picture[i] = s->current_picture[i];
+        s->current_picture[i] = tmp;
+    }
+    s->picture_number++;
+
+    if (s->out_format == FMT_MJPEG)
+        mjpeg_picture_trailer(s);
+
+    flush_put_bits(&s->pb);
+    s->total_bits += (s->pb.buf_ptr - s->pb.buf) * 8;
+    return s->pb.buf_ptr - s->pb.buf;
+}
+
+/* insert a fake P picture */
+static void mpeg1_skip_picture(MpegEncContext *s, int pict_num)
+{
+    unsigned int mb_incr;
+
+    /* mpeg1 picture header */
+    put_header(s, PICTURE_START_CODE);
+    /* temporal reference */
+    put_bits(&s->pb, 10, pict_num & 0x3ff); 
+    
+    put_bits(&s->pb, 3, P_TYPE);
+    put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */
+    
+    put_bits(&s->pb, 1, 1); /* integer coordinates */
+    put_bits(&s->pb, 3, 1); /* forward_f_code */
+    
+    put_bits(&s->pb, 1, 0); /* extra bit picture */
+    
+    /* only one slice */
+    put_header(s, SLICE_MIN_START_CODE);
+    put_bits(&s->pb, 5, 1); /* quantizer scale */
+    put_bits(&s->pb, 1, 0); /* slice extra information */
+    
+    mb_incr = 1;
+    put_bits(&s->pb, mbAddrIncrTable[mb_incr][1], 
+             mbAddrIncrTable[mb_incr][0]);
+    
+    /* empty macroblock */
+    put_bits(&s->pb, 3, 1); /* motion only */
+    
+    /* zero motion x & y */
+    put_bits(&s->pb, 1, 1); 
+    put_bits(&s->pb, 1, 1); 
+
+    /* output a number of empty slice */
+    mb_incr = s->mb_width * s->mb_height - 1;
+    while (mb_incr > 33) {
+        put_bits(&s->pb, 11, 0x008);
+        mb_incr -= 33;
+    }
+    put_bits(&s->pb, mbAddrIncrTable[mb_incr][1], 
+             mbAddrIncrTable[mb_incr][0]);
+    
+    /* empty macroblock */
+    put_bits(&s->pb, 3, 1); /* motion only */
+    
+    /* zero motion x & y */
+    put_bits(&s->pb, 1, 1); 
+    put_bits(&s->pb, 1, 1); 
+}
+
+static int pix_sum(UINT8 *pix, int line_size)
+{
+    int s, i, j;
+
+    s = 0;
+    for(i=0;i<16;i++) {
+        for(j=0;j<16;j+=8) {
+            s += pix[0];
+            s += pix[1];
+            s += pix[2];
+            s += pix[3];
+            s += pix[4];
+            s += pix[5];
+            s += pix[6];
+            s += pix[7];
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+
+static int pix_norm1(UINT8 *pix, int line_size)
+{
+    int s, i, j;
+    UINT32 *sq = squareTbl + 256;
+
+    s = 0;
+    for(i=0;i<16;i++) {
+        for(j=0;j<16;j+=8) {
+            s += sq[pix[0]];
+            s += sq[pix[1]];
+            s += sq[pix[2]];
+            s += sq[pix[3]];
+            s += sq[pix[4]];
+            s += sq[pix[5]];
+            s += sq[pix[6]];
+            s += sq[pix[7]];
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+
+static int pix_norm(UINT8 *pix1, UINT8 *pix2, int line_size)
+{
+    int s, i, j;
+    UINT32 *sq = squareTbl + 256;
+
+    s = 0;
+    for(i=0;i<16;i++) {
+        for(j=0;j<16;j+=8) {
+            s += sq[pix1[0] - pix2[0]];
+            s += sq[pix1[1] - pix2[1]];
+            s += sq[pix1[2] - pix2[2]];
+            s += sq[pix1[3] - pix2[3]];
+            s += sq[pix1[4] - pix2[4]];
+            s += sq[pix1[5] - pix2[5]];
+            s += sq[pix1[6] - pix2[6]];
+            s += sq[pix1[7] - pix2[7]];
+            pix1 += 8;
+            pix2 += 8;
+        }
+        pix1 += line_size - 16;
+        pix2 += line_size - 16;
+    }
+    return s;
+}
+
+
+static int estimate_motion(MpegEncContext *s, 
+                           int mb_x, int mb_y,
+                           int *mx_ptr, int *my_ptr)
+{
+    UINT8 *pix, *ppix;
+    int sum, varc, vard;
+
+    pix = s->new_picture[0] + (mb_y * 16 * s->width) + mb_x * 16;
+    ppix = s->last_picture[0] + (mb_y * 16 * s->width) + mb_x * 16;
+
+    sum = pix_sum(pix, s->width);
+    varc = pix_norm1(pix, s->width);
+    vard = pix_norm(pix, ppix, s->width);
+    
+    vard = vard >> 8;
+    sum = sum >> 8;
+    varc = (varc >> 8)  - sum * sum;
+
+    *mx_ptr = 0;
+    *my_ptr = 0;
+    if (vard <= 64) {
+	return 0;
+    } else if (vard < varc) {
+	return 0;
+    } else {
+        return 1;
+    }
+}
+
+static void get_pixels(DCTELEM *block, const UINT8 *pixels, int line_size);
+static void put_pixels(const DCTELEM *block, UINT8 *pixels, int line_size);
+static void sub_pixels(DCTELEM *block, const UINT8 *pixels, int line_size);
+static void add_pixels(DCTELEM *block, const UINT8 *pixels, int line_size);
+static int dct_quantize(MpegEncContext *s, DCTELEM *block, int qscale);
+static void encode_block(MpegEncContext *s, 
+                         DCTELEM *block, 
+                         int component);
+static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int qscale);
+static void mpeg1_encode_mb(MpegEncContext *s, int mb_x, int mb_y,
+                            DCTELEM block[6][64],
+                            int motion_x, int motion_y);
+
+static void encode_picture(MpegEncContext *s, int picture_number)
+{
+    int mb_x, mb_y;
+    UINT8 *ptr;
+    DCTELEM block[6][64];
+    int i, motion_x, motion_y;
+
+    s->picture_number = picture_number;
+    s->qscale = rate_estimate_qscale(s);
+
+    /* precompute matrix */
+    if (s->out_format == FMT_MJPEG) {
+        /* for mjpeg, we do include qscale in the matrix */
+        s->init_intra_matrix[0] = default_intra_matrix[0];
+        for(i=1;i<64;i++)
+            s->init_intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
+        convert_matrix(s->intra_matrix, s->init_intra_matrix, 8);
+    } else {
+        convert_matrix(s->intra_matrix, default_intra_matrix, s->qscale);
+        convert_matrix(s->non_intra_matrix, default_non_intra_matrix, s->qscale);
+    }
+
+    switch(s->out_format) {
+    case FMT_MJPEG:
+        mjpeg_picture_header(s);
+        break;
+    case FMT_H263:
+        if (s->h263_rv10) 
+            rv10_encode_picture_header(s, picture_number);
+        else
+            h263_picture_header(s, picture_number);
+        break;
+    case FMT_MPEG1:
+        /* mpeg1 picture header */
+        put_header(s, PICTURE_START_CODE);
+        /* temporal reference */
+        put_bits(&s->pb, 10, (s->fake_picture_number - 
+                              s->gop_picture_number) & 0x3ff); 
+        
+        put_bits(&s->pb, 3, s->pict_type);
+        put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */
+        
+        if (s->pict_type == P_TYPE) {
+            put_bits(&s->pb, 1, 1); /* integer coordinates */
+            put_bits(&s->pb, 3, 1); /* forward_f_code */
+        }
+        
+        put_bits(&s->pb, 1, 0); /* extra bit picture */
+        
+        /* only one slice */
+        put_header(s, SLICE_MIN_START_CODE);
+        put_bits(&s->pb, 5, s->qscale); /* quantizer scale */
+        put_bits(&s->pb, 1, 0); /* slice extra information */
+        break;
+    }
+        
+    /* init last dc values */
+    /* XXX: quant matrix value is implied here */
+    s->last_dc[0] = 128;
+    s->last_dc[1] = 128;
+    s->last_dc[2] = 128;
+    s->mb_incr = 1;
+    
+    for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+        for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+            /* compute motion vector and macro block type (intra or non intra) */
+            motion_x = 0;
+            motion_y = 0;
+            if (s->pict_type == P_TYPE) {
+                s->mb_intra = estimate_motion(s, mb_x, mb_y,
+                                              &motion_x,
+                                              &motion_y);
+            } else {
+                s->mb_intra = 1;
+            }
+
+            /* reset intra predictors if non intra mb */
+            if (!s->mb_intra) {
+                s->last_dc[0] = 128;
+                s->last_dc[1] = 128;
+                s->last_dc[2] = 128;
+            }
+
+            /* get the pixels */
+            ptr = s->new_picture[0] + (mb_y * 16 * s->width) + mb_x * 16;
+            get_pixels(block[0], ptr, s->width);
+            get_pixels(block[1], ptr + 8, s->width);
+            get_pixels(block[2], ptr + 8 * s->width, s->width);
+            get_pixels(block[3], ptr + 8 * s->width + 8, s->width);
+            ptr = s->new_picture[1] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8;
+            get_pixels(block[4],ptr, s->width >> 1);
+
+            ptr = s->new_picture[2] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8;
+            get_pixels(block[5],ptr, s->width >> 1);
+
+            /* subtract previous frame if non intra */
+            if (!s->mb_intra) {
+                ptr = s->last_picture[0] + 
+                    ((mb_y * 16 + motion_y) * s->width) + (mb_x * 16 + motion_x);
+
+                sub_pixels(block[0], ptr, s->width);
+                sub_pixels(block[1], ptr + 8, s->width);
+                sub_pixels(block[2], ptr + s->width * 8, s->width);
+                sub_pixels(block[3], ptr + 8 + s->width * 8, s->width);
+                ptr = s->last_picture[1] + 
+                    ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + 
+                    (mb_x * 8 + (motion_x >> 1));
+                sub_pixels(block[4], ptr, s->width >> 1);
+                ptr = s->last_picture[2] + 
+                    ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + 
+                    (mb_x * 8 + (motion_x >> 1));
+                sub_pixels(block[5], ptr, s->width >> 1);
+            }
+
+            /* DCT & quantize */
+            for(i=0;i<6;i++) {
+                int last_index;
+                last_index = dct_quantize(s, block[i], s->qscale);
+                s->block_last_index[i] = last_index;
+            }
+
+            /* huffman encode */
+            switch(s->out_format) {
+            case FMT_MPEG1:
+                mpeg1_encode_mb(s, mb_x, mb_y, block, motion_x, motion_y);
+                break;
+            case FMT_H263:
+                h263_encode_mb(s, block, motion_x, motion_y);
+                break;
+            case FMT_MJPEG:
+                mjpeg_encode_mb(s, block);
+                break;
+            }
+
+            /* decompress blocks so that we keep the state of the decoder */
+            if (!s->intra_only) {
+                for(i=0;i<6;i++) {
+                    if (s->block_last_index[i] >= 0) {
+                        dct_unquantize(s, block[i], s->qscale);
+                    }
+                }
+
+                if (!s->mb_intra) {
+                    ptr = s->last_picture[0] + 
+                        ((mb_y * 16 + motion_y) * s->width) + (mb_x * 16 + motion_x);
+                    
+                    add_pixels(block[0], ptr, s->width);
+                    add_pixels(block[1], ptr + 8, s->width);
+                    add_pixels(block[2], ptr + s->width * 8, s->width);
+                    add_pixels(block[3], ptr + 8 + s->width * 8, s->width);
+                    ptr = s->last_picture[1] + 
+                        ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + 
+                        (mb_x * 8 + (motion_x >> 1));
+                    add_pixels(block[4], ptr, s->width >> 1);
+                    ptr = s->last_picture[2] + 
+                        ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + 
+                        (mb_x * 8 + (motion_x >> 1));
+                    add_pixels(block[5], ptr, s->width >> 1);
+                }
+
+                /* write the pixels */
+                ptr = s->current_picture[0] + (mb_y * 16 * s->width) + mb_x * 16;
+                put_pixels(block[0], ptr, s->width);
+                put_pixels(block[1], ptr + 8, s->width);
+                put_pixels(block[2], ptr + 8 * s->width, s->width);
+                put_pixels(block[3], ptr + 8 * s->width + 8, s->width);
+                ptr = s->current_picture[1] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8;
+                put_pixels(block[4],ptr, s->width >> 1);
+                
+                ptr = s->current_picture[2] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8;
+                put_pixels(block[5],ptr, s->width >> 1);
+            }
+        }
+    }
+}
+
+static void mpeg1_encode_mb(MpegEncContext *s, int mb_x, int mb_y,
+                            DCTELEM block[6][64],
+                            int motion_x, int motion_y)
+{
+    int mb_incr, i, cbp;
+
+    /* compute cbp */
+    cbp = 0;
+    for(i=0;i<6;i++) {
+        if (s->block_last_index[i] >= 0)
+            cbp |= 1 << (5 - i);
+    }
+
+    /* skip macroblock, except if first or last macroblock of a slice */
+    if ((cbp | motion_x | motion_y) == 0 &&
+        (!((mb_x | mb_y) == 0 ||
+           (mb_x == s->mb_width - 1 && mb_y == s->mb_height - 1)))) {
+        s->mb_incr++;
+    } else {
+        /* output mb incr */
+        mb_incr = s->mb_incr;
+
+        while (mb_incr > 33) {
+            put_bits(&s->pb, 11, 0x008);
+            mb_incr -= 33;
+        }
+        put_bits(&s->pb, mbAddrIncrTable[mb_incr][1], 
+                 mbAddrIncrTable[mb_incr][0]);
+        
+        if (s->pict_type == I_TYPE) {
+            put_bits(&s->pb, 1, 1); /* macroblock_type : macroblock_quant = 0 */
+        } else {
+            if (s->mb_intra) {
+                put_bits(&s->pb, 5, 0x03);
+            } else {
+                if (motion_x == 0 && motion_y == 0) {
+                    if (cbp != 0) {
+                        put_bits(&s->pb, 2, 1); /* macroblock_pattern only */
+                        put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
+                    } else {
+                        put_bits(&s->pb, 3, 1); /* motion only & zero motion vectors */
+                        /* zero motion x & y */
+                        put_bits(&s->pb, 1, 1); 
+                        put_bits(&s->pb, 1, 1); 
+                    }
+                } else {
+                    /* XXX: not used yet */
+                    put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
+                }
+            }
+            
+        }
+        
+        for(i=0;i<6;i++) {
+            if (cbp & (1 << (5 - i))) {
+                encode_block(s, block[i], i);
+            }
+        }
+        s->mb_incr = 1;
+    }
+}
+
+static void get_pixels(DCTELEM *block, const UINT8 *pixels, int line_size)
+{
+    DCTELEM *p;
+    const UINT8 *pix;
+    int i;
+
+    /* read the pixels */
+    p = block;
+    pix = pixels;
+    for(i=0;i<8;i++) {
+        p[0] = pix[0];
+        p[1] = pix[1];
+        p[2] = pix[2];
+        p[3] = pix[3];
+        p[4] = pix[4];
+        p[5] = pix[5];
+        p[6] = pix[6];
+        p[7] = pix[7];
+        pix += line_size;
+        p += 8;
+    }
+}
+
+static void put_pixels(const DCTELEM *block, UINT8 *pixels, int line_size)
+{
+    const DCTELEM *p;
+    UINT8 *pix;
+    int i;
+    UINT8 *cm = cropTbl + MAX_NEG_CROP;
+    
+    /* read the pixels */
+    p = block;
+    pix = pixels;
+    for(i=0;i<8;i++) {
+        pix[0] = cm[p[0]];
+        pix[1] = cm[p[1]];
+        pix[2] = cm[p[2]];
+        pix[3] = cm[p[3]];
+        pix[4] = cm[p[4]];
+        pix[5] = cm[p[5]];
+        pix[6] = cm[p[6]];
+        pix[7] = cm[p[7]];
+        pix += line_size;
+        p += 8;
+    }
+}
+
+static void sub_pixels(DCTELEM *block, const UINT8 *pixels, int line_size)
+{
+    DCTELEM *p;
+    const UINT8 *pix;
+    int i;
+
+    /* read the pixels */
+    p = block;
+    pix = pixels;
+    for(i=0;i<8;i++) {
+        p[0] -= pix[0];
+        p[1] -= pix[1];
+        p[2] -= pix[2];
+        p[3] -= pix[3];
+        p[4] -= pix[4];
+        p[5] -= pix[5];
+        p[6] -= pix[6];
+        p[7] -= pix[7];
+        pix += line_size;
+        p += 8;
+    }
+}
+
+static void add_pixels(DCTELEM *block, const UINT8 *pixels, int line_size)
+{
+    DCTELEM *p;
+    const UINT8 *pix;
+    int i;
+
+    /* read the pixels */
+    p = block;
+    pix = pixels;
+    for(i=0;i<8;i++) {
+        p[0] += pix[0];
+        p[1] += pix[1];
+        p[2] += pix[2];
+        p[3] += pix[3];
+        p[4] += pix[4];
+        p[5] += pix[5];
+        p[6] += pix[6];
+        p[7] += pix[7];
+        pix += line_size;
+        p += 8;
+    }
+}
+
+#define USE_FAST_MUL 
+
+static int dct_quantize(MpegEncContext *s, 
+                        DCTELEM *block, 
+                        int qscale)
+{
+    int i, j, level, last_non_zero;
+#ifdef USE_FAST_MUL
+    const int *qmat;
+#else
+    const UINT8 *qmat;
+#endif
+
+    jpeg_fdct_ifast (block);
+
+    if (s->mb_intra) {
+        block[0] = (block[0] + 4 * 8) >> 6;
+        i = 1;
+        last_non_zero = 0;
+        if (s->out_format == FMT_H263) {
+#ifdef USE_FAST_MUL
+            qmat = s->non_intra_matrix;
+#else
+            qmat = default_non_intra_matrix;
+#endif
+        } else {
+#ifdef USE_FAST_MUL
+            qmat = s->intra_matrix;
+#else
+            qmat = default_intra_matrix;
+#endif
+        }
+    } else {
+        i = 0;
+        last_non_zero = -1;
+#ifdef USE_FAST_MUL
+        qmat = s->non_intra_matrix;
+#else
+        qmat = default_non_intra_matrix;
+#endif
+    }
+
+    for(;i<64;i++) {
+        j = zigzag_direct[i];
+        level = block[j];
+#ifdef USE_FAST_MUL
+        level = (level * qmat[j]) / (1 << 22);
+#else
+        /* post dct normalization */
+        level = (level << 11) / aanscales[j];
+        /* quantification */
+        level = (8 * level) / (qscale * qmat[j]);
+#endif
+        block[j] = level;
+        if (level)
+            last_non_zero = i;
+    }
+    return last_non_zero;
+}
+
+static void dct_unquantize(MpegEncContext *s, 
+                           DCTELEM *block, int qscale)
+{
+    int i, level, coeff;
+    const UINT8 *quant_matrix;
+
+    if (s->mb_intra) {
+        block[0] = block[0] << 3;
+        if (s->out_format == FMT_H263) {
+            i = 1;
+            goto unquant_even;
+        }
+        quant_matrix = default_intra_matrix;
+        for(i=1;i<64;i++) {
+            block[i] = (block[i] * qscale * quant_matrix[i]) >> 3;
+        }
+    } else {
+        i = 0;
+    unquant_even:
+        quant_matrix = default_non_intra_matrix;
+        for(;i<64;i++) {
+            level = block[i];
+            if (level) {
+                if (level < 0) {
+                    coeff = (((level << 1) - 1) * qscale *
+                             ((int) (quant_matrix[i]))) >> 4;
+                    coeff += (coeff & 1);
+                } else {
+                    coeff = (((level << 1) + 1) * qscale *
+                             ((int) (quant_matrix[i]))) >> 4;
+                    coeff -= (coeff & 1);
+                }
+                block[i] = coeff;
+            }
+        }
+    }
+
+    j_rev_dct(block);
+}
+                         
+
+static inline void encode_dc(MpegEncContext *s, int diff, int component)
+{
+    int adiff, index;
+
+    //    printf("dc=%d c=%d\n", diff, component);
+    adiff = abs(diff);
+    index = vlc_dc_table[adiff];
+    if (component == 0) {
+        put_bits(&s->pb, vlc_dc_lum_bits[index], vlc_dc_lum_code[index]);
+    } else {
+        put_bits(&s->pb, vlc_dc_chroma_bits[index], vlc_dc_chroma_code[index]);
+    }
+    if (diff > 0) {
+        put_bits(&s->pb, index, (diff & ((1 << index) - 1)));
+    } else if (diff < 0) {
+        put_bits(&s->pb, index, ((diff - 1) & ((1 << index) - 1)));
+    }
+}
+
+static void encode_block(MpegEncContext *s, 
+                         DCTELEM *block, 
+                         int n)
+{
+    int alevel, level, last_non_zero, dc, diff, i, j, run, last_index;
+    int code, nbits, component;
+    
+    last_index = s->block_last_index[n];
+
+    /* DC coef */
+    if (s->mb_intra) {
+        component = (n <= 3 ? 0 : n - 4 + 1);
+        dc = block[0]; /* overflow is impossible */
+        diff = dc - s->last_dc[component];
+        encode_dc(s, diff, component);
+        s->last_dc[component] = dc;
+        i = 1;
+    } else {
+        /* encode the first coefficient : needs to be done here because
+           it is handled slightly differently */
+        level = block[0];
+        if (abs(level) == 1) {
+                code = ((UINT32)level >> 31); /* the sign bit */
+                put_bits(&s->pb, 2, code | 0x02);
+                i = 1;
+        } else {
+            i = 0;
+            last_non_zero = -1;
+            goto next_coef;
+        }
+    }
+
+    /* now quantify & encode AC coefs */
+    last_non_zero = i - 1;
+    for(;i<=last_index;i++) {
+        j = zigzag_direct[i];
+        level = block[j];
+    next_coef:
+#if 0
+        if (level != 0)
+            printf("level[%d]=%d\n", i, level);
+#endif            
+        /* encode using VLC */
+        if (level != 0) {
+            run = i - last_non_zero - 1;
+            alevel = abs(level);
+            //            printf("run=%d level=%d\n", run, level);
+            if ( (run < HUFF_MAXRUN) && (alevel < huff_maxlevel[run])) {
+                /* encode using the Huffman tables */
+                code = (huff_table[run])[alevel];
+                nbits = (huff_bits[run])[alevel];
+                code |= ((UINT32)level >> 31); /* the sign bit */
+
+                put_bits(&s->pb, nbits, code);
+            } else {
+                /* escape: only clip in this case */
+                if (level > 255)
+                    level = 255;
+                else if (level < -255)
+                    level = -255;
+                put_bits(&s->pb, 6, 0x1);
+                put_bits(&s->pb, 6, run);
+                if (alevel < 128) {
+                    put_bits(&s->pb, 8, level & 0xff);
+                } else {
+                    if (level < 0) {
+                        put_bits(&s->pb, 16, 0x8001 + level + 255);
+                    } else {
+                        put_bits(&s->pb, 16, level & 0xffff);
+                    }
+                }
+            }
+            last_non_zero = i;
+        }
+    }
+    /* end of block */
+    put_bits(&s->pb, 2, 0x2);
+}
+
+
+/* rate control */
+
+/* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */
+#define I_FRAME_SIZE_RATIO 1.5
+#define QSCALE_K           20
+
+static void rate_control_init(MpegEncContext *s)
+{
+    s->wanted_bits = 0;
+
+    if (s->intra_only) {
+        s->I_frame_bits = s->bit_rate / s->frame_rate;
+        s->P_frame_bits = s->I_frame_bits;
+    } else {
+        s->P_frame_bits = (int) ((float)(s->gop_size * s->bit_rate) / 
+                    (float)(s->frame_rate * (I_FRAME_SIZE_RATIO + s->gop_size - 1)));
+        s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO);
+    }
+    
+#if defined(DEBUG)
+    printf("I_frame_size=%d P_frame_size=%d\n",
+           s->I_frame_bits, s->P_frame_bits);
+#endif
+}
+
+
+/*
+ * This heuristic is rather poor, but at least we do not have to
+ * change the qscale at every macroblock.
+ */
+static int rate_estimate_qscale(MpegEncContext *s)
+{
+    long long total_bits = s->total_bits;
+    float q;
+    int qscale, diff;
+
+    if (s->pict_type == I_TYPE) {
+        s->wanted_bits += s->I_frame_bits;
+    } else {
+        s->wanted_bits += s->P_frame_bits;
+    }
+    diff = s->wanted_bits - total_bits;
+    q = 31.0 - (float)diff / (QSCALE_K * s->mb_height * s->mb_width);
+    /* adjust for I frame */
+    if (s->pict_type == I_TYPE && !s->intra_only) {
+        q /= I_FRAME_SIZE_RATIO;
+    }
+
+    if (q < 1)
+        q = 1;
+    else if (q > 31)
+        q = 31;
+    qscale = (int)(q + 0.5);
+#if defined(DEBUG)
+    printf("%d: total=%Ld br=%0.1f diff=%d qest=%0.1f\n", 
+           s->picture_number, 
+           total_bits, (float)s->frame_rate * total_bits / s->picture_number, 
+           diff, q);
+#endif
+    return qscale;
+}
+
+AVEncoder mpeg1video_encoder = {
+    "mpeg1video",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG1VIDEO,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+};
+
+AVEncoder h263_encoder = {
+    "h263",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H263,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+};
+
+AVEncoder rv10_encoder = {
+    "rv10",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RV10,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+};
+
+AVEncoder mjpeg_encoder = {
+    "mjpeg",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MJPEG,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+};
diff --git a/libav/mpegvideo.h b/libav/mpegvideo.h
new file mode 100644
index 0000000000..e1fbe044a0
--- /dev/null
+++ b/libav/mpegvideo.h
@@ -0,0 +1,94 @@
+/* mpegencode.c */
+
+/* Start codes. */
+#define SEQ_END_CODE		0x000001b7
+#define SEQ_START_CODE		0x000001b3
+#define GOP_START_CODE		0x000001b8
+#define PICTURE_START_CODE	0x00000100
+#define SLICE_MIN_START_CODE	0x00000101
+#define SLICE_MAX_START_CODE	0x000001af
+#define EXT_START_CODE		0x000001b5
+#define USER_START_CODE		0x000001b2
+
+/* Macros for picture code type. */
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+
+typedef int DCTELEM;
+
+enum OutputFormat {
+    FMT_MPEG1,
+    FMT_H263,
+    FMT_MJPEG,
+};
+
+#define MAX_NEG_CROP 384
+
+#define MPEG_BUF_SIZE (16 * 1024)
+
+typedef struct MpegEncContext {
+    /* the following parameters must be initialized before encoding */
+    int width, height; /* picture size. must be a multiple of 16 */
+    int gop_size;
+    int frame_rate; /* number of frames per second */
+    int intra_only; /* if true, only intra pictures are generated */
+    int bit_rate;        /* wanted bit rate */
+    enum OutputFormat out_format; /* output format */
+    int h263_rv10; /* use RV10 variation for H263 */
+
+    /* the following fields are managed internally by the encoder */
+
+    /* bit output */
+    PutBitContext pb;
+
+    /* sequence parameters */
+    int picture_number;
+    int fake_picture_number; /* picture number at the bitstream frame rate */
+    int gop_picture_number; /* index of the first picture of a GOP */
+    int mb_width, mb_height;
+    UINT8 *new_picture[3];     /* picture to be compressed */
+    UINT8 *last_picture[3];    /* previous picture */
+    UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */
+    int last_dc[3];
+    int qscale;
+    int pict_type;
+    int frame_rate_index;
+    /* macroblock layer */
+    int mb_incr;
+    int mb_intra;
+    /* matrix transmitted in the bitstream */
+    UINT8 init_intra_matrix[64];
+    /* precomputed matrix (combine qscale and DCT renorm) */
+    int intra_matrix[64];
+    int non_intra_matrix[64];
+    int block_last_index[6];  /* last non zero coefficient in block */
+
+    void *opaque; /* private data for the user */
+
+    /* bit rate control */
+    int I_frame_bits;    /* wanted number of bits per I frame */
+    int P_frame_bits;    /* same for P frame */
+    long long wanted_bits;
+    long long total_bits;
+    struct MJpegContext *mjpeg_ctx;
+} MpegEncContext;
+
+extern const UINT8 zigzag_direct[64];
+
+/* h263enc.c */
+
+void h263_encode_mb(MpegEncContext *s, 
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y);
+void h263_picture_header(MpegEncContext *s, int picture_number);
+void rv10_encode_picture_header(MpegEncContext *s, int picture_number);
+
+/* mjpegenc.c */
+
+int mjpeg_init(MpegEncContext *s);
+void mjpeg_close(MpegEncContext *s);
+void mjpeg_encode_mb(MpegEncContext *s, 
+                     DCTELEM block[6][64]);
+void mjpeg_picture_header(MpegEncContext *s);
+void mjpeg_picture_trailer(MpegEncContext *s);
diff --git a/libav/resample.c b/libav/resample.c
new file mode 100644
index 0000000000..008153b0d6
--- /dev/null
+++ b/libav/resample.c
@@ -0,0 +1,245 @@
+/*
+ * Sample rate convertion for both audio and video
+ * Copyright (c) 2000 Gerard Lantau.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <math.h>
+#include "avcodec.h"
+
+#define NDEBUG
+#include <assert.h>
+
+#define FRAC_BITS 16
+#define FRAC (1 << FRAC_BITS)
+
+static void init_mono_resample(ReSampleChannelContext *s, float ratio)
+{
+    ratio = 1.0 / ratio;
+    s->iratio = (int)floor(ratio);
+    if (s->iratio == 0)
+        s->iratio = 1;
+    s->incr = (int)((ratio / s->iratio) * FRAC);
+    s->frac = 0;
+    s->last_sample = 0;
+    s->icount = s->iratio;
+    s->isum = 0;
+    s->inv = (FRAC / s->iratio);
+}
+
+/* fractional audio resampling */
+static int fractional_resample(ReSampleChannelContext *s, short *output, short *input, int nb_samples)
+{
+    unsigned int frac, incr;
+    int l0, l1;
+    short *q, *p, *pend;
+
+    l0 = s->last_sample;
+    incr = s->incr;
+    frac = s->frac;
+
+    p = input;
+    pend = input + nb_samples;
+    q = output;
+
+    l1 = *p++;
+    for(;;) {
+        /* interpolate */
+        *q++ = (l0 * (FRAC - frac) + l1 * frac) >> FRAC_BITS;
+        frac = frac + s->incr;
+        while (frac >= FRAC) {
+            if (p >= pend)
+                goto the_end;
+            frac -= FRAC;
+            l0 = l1;
+            l1 = *p++;
+        }
+    }
+ the_end:
+    s->last_sample = l1;
+    s->frac = frac;
+    return q - output;
+}
+
+static int integer_downsample(ReSampleChannelContext *s, short *output, short *input, int nb_samples)
+{
+    short *q, *p, *pend;
+    int c, sum;
+
+    p = input;
+    pend = input + nb_samples;
+    q = output;
+
+    c = s->icount;
+    sum = s->isum;
+
+    for(;;) {
+        sum += *p++;
+        if (--c == 0) {
+            *q++ = (sum * s->inv) >> FRAC_BITS;
+            c = s->iratio;
+            sum = 0;
+        }
+        if (p >= pend)
+            break;
+    }
+    s->isum = sum;
+    s->icount = c;
+    return q - output;
+}
+
+/* n1: number of samples */
+static void stereo_to_mono(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q[1] = (p[2] + p[3]) >> 1;
+        q[2] = (p[4] + p[5]) >> 1;
+        q[3] = (p[6] + p[7]) >> 1;
+        q += 4;
+        p += 8;
+        n -= 4;
+    }
+    while (n > 0) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q++;
+        p += 2;
+        n--;
+    }
+}
+
+/* XXX: should use more abstract 'N' channels system */
+static void stereo_split(short *output1, short *output2, short *input, int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        *output1++ = *input++;
+        *output2++ = *input++;
+    }
+}
+
+static void stereo_mux(short *output, short *input1, short *input2, int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        *output++ = *input1++;
+        *output++ = *input2++;
+    }
+}
+
+static int mono_resample(ReSampleChannelContext *s, short *output, short *input, int nb_samples)
+{
+    short buf1[nb_samples];
+    short *buftmp;
+
+    /* first downsample by an integer factor with averaging filter */
+    if (s->iratio > 1) {
+        buftmp = buf1;
+        nb_samples = integer_downsample(s, buftmp, input, nb_samples);
+    } else {
+        buftmp = input;
+    }
+
+    /* then do a fractional resampling with linear interpolation */
+    if (s->incr != FRAC) {
+        nb_samples = fractional_resample(s, output, buftmp, nb_samples);
+    } else {
+        memcpy(output, buftmp, nb_samples * sizeof(short));
+    }
+    return nb_samples;
+}
+
+/* ratio = output_rate / input_rate */
+int audio_resample_init(ReSampleContext *s, 
+                        int output_channels, int input_channels, 
+                        int output_rate, int input_rate)
+{
+    int i;
+    
+    s->ratio = (float)output_rate / (float)input_rate;
+    
+    if (output_channels > 2 || input_channels > 2)
+        return -1;
+    s->input_channels = input_channels;
+    s->output_channels = output_channels;
+
+    for(i=0;i<output_channels;i++) {
+        init_mono_resample(&s->channel_ctx[i], s->ratio);
+    }
+    return 0;
+}
+
+/* resample audio. 'nb_samples' is the number of input samples */
+/* XXX: optimize it ! */
+/* XXX: do it with polyphase filters, since the quality here is
+   HORRIBLE. Return the number of samples available in output */
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples)
+{
+    int i, nb_samples1;
+    short buf[5][nb_samples];
+    short *buftmp1, *buftmp2[2], *buftmp3[2];
+
+    if (s->input_channels == s->output_channels && s->ratio == 1.0) {
+        /* nothing to do */
+        memcpy(output, input, nb_samples * s->input_channels * sizeof(short));
+        return nb_samples;
+    }
+
+    if (s->input_channels == 2 &&
+        s->output_channels == 1) {
+        buftmp1 = buf[0];
+        stereo_to_mono(buftmp1, input, nb_samples);
+    } else if (s->input_channels == 1 &&
+               s->output_channels == 2) {
+        /* XXX: do it */
+        abort();
+    } else {
+        buftmp1 = input;
+    }
+
+    if (s->output_channels == 2) {
+        buftmp2[0] = buf[1];
+        buftmp2[1] = buf[2];
+        buftmp3[0] = buf[3];
+        buftmp3[1] = buf[4];
+        stereo_split(buftmp2[0], buftmp2[1], buftmp1, nb_samples);
+    } else {
+        buftmp2[0] = buftmp1;
+        buftmp3[0] = output;
+    }
+
+    /* resample each channel */
+    nb_samples1 = 0; /* avoid warning */
+    for(i=0;i<s->output_channels;i++) {
+        nb_samples1 = mono_resample(&s->channel_ctx[i], buftmp3[i], buftmp2[i], nb_samples);
+    }
+
+    if (s->output_channels == 2) {
+        stereo_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
+    }
+
+    return nb_samples1;
+}
author	Fabrice Bellard <fabrice@bellard.org>	2000-12-20 00:02:47 +0000
committer	Fabrice Bellard <fabrice@bellard.org>	2000-12-20 00:02:47 +0000
commit	9aeeeb63f7e1ab7b0b7bb839a5f258667a2d2d78 (patch)
tree	133769894d45da35e05ded6ea39d33bb81e7ae18 /libav
parent	77bb6835ba752bb9335d208963a53227bbb1bc63 (diff)