Merge remote-tracking branch 'qatar/master'

* qatar/master: APIchanges: fill in date and commit for request_sample_fmt Add floating-point sample format support to the ac3, eac3, dca, aac, and vorbis decoders. Add support for request_sample_format in ffmpeg and ffplay. Add APIchanges entry for request_sample_fmt. Add request_sample_fmt field to AVCodecContext. Add float_interleave() to FmtConvertContext with x86-optimized versions. Remove unused make variable SEEK_REFFILE fate: remove redundant aref and vref references fate: remove do_ffmpeg_nocheck function fate: do not collect -benchmark output mpegaudiodec: remove decode_end() function fate: run aref and vref as regular tests mpegaudio: sanitise compute_antialias_* names mpeg12: add slice-threading checks to slice-threading initializers. h264: copy pixel_shift between slice threading contexts. mdec: enable frame-level multithreading. mdec.c: fix overread. Conflicts: libavcodec/aacdec.c libavcodec/ac3dec.c libavcodec/avcodec.h libavcodec/dca.c libavcodec/h264.c libavcodec/mdec.c libavcodec/mpeg12.c libavcodec/options.c libavcodec/version.h libavcodec/vorbisdec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-05-19 05:12:45 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2011-05-19 06:00:31 +0200
commit: 75a37b57a59f6701d9443c5f7a0ceec108b27a18 (patch)
tree: 1eea866003f3d7385261dea40b5b8063e87f9b8a /libavcodec
parent: 8529f9b36b7c1b8f2cb36ba2709983517c4b6458 (diff)
parent: 41e21e4db623ebd77f431a6f30cf21d62d9e1f33 (diff)
17 files changed, 323 insertions, 82 deletions
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 61e33656c7..7564714e31 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -186,7 +186,7 @@ static av_cold int che_configure(AACContext *ac,
     if (che_pos[type][id]) {
         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
             return AVERROR(ENOMEM);
-        ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
+        ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
         if (type != TYPE_CCE) {
             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
             if (type == TYPE_CPE ||
@@ -550,6 +550,7 @@ static void reset_predictor_group(PredictorState *ps, int group_num)
 static av_cold int aac_decode_init(AVCodecContext *avctx)
 {
     AACContext *ac = avctx->priv_data;
+    float output_scale_factor;
 
     ac->avctx = avctx;
     ac->m4ac.sample_rate = avctx->sample_rate;
@@ -561,8 +562,13 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
             return -1;
     }
 
-    avctx->sample_fmt = avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT ?
-                        AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        output_scale_factor = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        output_scale_factor = 1.0;
+    }
 
     AAC_INIT_VLC_STATIC( 0, 304);
     AAC_INIT_VLC_STATIC( 1, 270);
@@ -590,9 +596,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                     352);
 
-    ff_mdct_init(&ac->mdct,       11, 1, 1.0/1024.0);
-    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0/128.0);
-    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0);
+    ff_mdct_init(&ac->mdct,       11, 1, output_scale_factor/1024.0);
+    ff_mdct_init(&ac->mdct_small,  8, 1, output_scale_factor/128.0);
+    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0/output_scale_factor);
     // window initialization
     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
@@ -2174,8 +2180,8 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
         avctx->frame_size = samples;
     }
 
-    data_size_tmp = samples * avctx->channels;
-    data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(float) : sizeof(int16_t);
+    data_size_tmp = samples * avctx->channels *
+                    (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     if (*data_size < data_size_tmp) {
         av_log(avctx, AV_LOG_ERROR,
                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
@@ -2185,10 +2191,12 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
     *data_size = data_size_tmp;
 
     if (samples) {
-        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
-            float_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
-        } else
-            ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
+            ac->fmt_conv.float_interleave(data, (const float **)ac->output_data,
+                                          samples, avctx->channels);
+        else
+            ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data,
+                                                   samples, avctx->channels);
     }
 
     if (ac->output_configured)
@@ -2507,7 +2515,7 @@ AVCodec ff_aac_decoder = {
     aac_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
@@ -2527,7 +2535,7 @@ AVCodec ff_aac_latm_decoder = {
     .decode = latm_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index afff6931fb..82092b385d 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -127,14 +127,19 @@ av_cold void ff_aac_sbr_init(void)
     ff_ps_init();
 }
 
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr)
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
 {
+    float mdct_scale;
     sbr->kx[0] = sbr->kx[1] = 32; //Typo in spec, kx' inits to 32
     sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
     sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
     sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
-    ff_mdct_init(&sbr->mdct, 7, 1, 1.0/64);
-    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0);
+    /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
+     * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
+     * and scale back down at synthesis. */
+    mdct_scale = ac->avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32768.0f : 1.0f;
+    ff_mdct_init(&sbr->mdct,     7, 1, 1.0 / (64 * mdct_scale));
+    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * mdct_scale);
     ff_ps_ctx_init(&sbr->ps);
 }
 
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index 6b10ed43e4..d0284981c3 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -36,7 +36,7 @@
 /** Initialize SBR. */
 av_cold void ff_aac_sbr_init(void);
 /** Initialize one SBR context. */
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr);
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr);
 /** Close one SBR context. */
 av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
 /** Decode one SBR element. */
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index b4aae2263a..9b44668ae2 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -185,6 +185,15 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     ff_fmt_convert_init(&s->fmt_conv, avctx);
     av_lfg_init(&s->dith_state, 0);
 
+    /* set scale value for float to int16 conversion */
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        s->mul_bias = 1.0f;
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    } else {
+        s->mul_bias = 32767.0f;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    }
+
     /* allow downmixing to stereo or mono */
     if (avctx->channels > 0 && avctx->request_channels > 0 &&
             avctx->request_channels < avctx->channels &&
@@ -193,14 +202,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     }
     s->downmixed = 1;
 
-    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
-        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
-        s->mul_bias = 1.0f;
-    } else {
-        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
-        /* set scale value for float to int16 conversion */
-        s->mul_bias = 32767.0f;
-    }
     return 0;
 }
 
@@ -1295,8 +1296,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     AC3DecodeContext *s = avctx->priv_data;
-    float *out_samples_flt = (float *)data;
-    int16_t *out_samples = (int16_t *)data;
+    float   *out_samples_flt = data;
+    int16_t *out_samples_s16 = data;
     int blk, ch, err;
     int data_size_orig, data_size_tmp;
     const uint8_t *channel_map;
@@ -1400,7 +1401,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
     for (ch = 0; ch < s->out_channels; ch++)
         output[ch] = s->output[channel_map[ch]];
     data_size_tmp = s->num_blocks * 256 * avctx->channels;
-    data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(*out_samples_flt) : sizeof(*out_samples);
+    data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(*out_samples_flt) : sizeof(*out_samples_s16);
     if (data_size_orig < data_size_tmp)
         return -1;
     *data_size = data_size_tmp;
@@ -1409,14 +1410,19 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
             av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
             err = 1;
         }
+
         if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
-            float_interleave_noscale(out_samples_flt, output, 256, s->out_channels);
+            s->fmt_conv.float_interleave(out_samples_flt, output, 256,
+                                         s->out_channels);
             out_samples_flt += 256 * s->out_channels;
         } else {
-            s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
-            out_samples += 256 * s->out_channels;
+            s->fmt_conv.float_to_int16_interleave(out_samples_s16, output, 256,
+                                                  s->out_channels);
+            out_samples_s16 += 256 * s->out_channels;
         }
     }
+    *data_size = s->num_blocks * 256 * avctx->channels *
+                 (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     return FFMIN(buf_size, s->frame_size);
 }
 
@@ -1441,6 +1447,9 @@ AVCodec ff_ac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 
 #if CONFIG_EAC3_DECODER
@@ -1453,5 +1462,8 @@ AVCodec ff_eac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 #endif
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index d1a5e6655e..99c349ed15 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2881,6 +2881,14 @@ typedef struct AVCodecContext {
     enum AVAudioServiceType audio_service_type;
 
     /**
+     * desired sample format
+     * - encoding: Not used.
+     * - decoding: Set by user.
+     * Decoder will decode to this format if it can.
+     */
+    enum AVSampleFormat request_sample_fmt;
+
+    /**
      * Current statistics for PTS correction.
      * - decoding: maintained and used by libavcodec, not intended to be used by user apps
      * - encoding: unused
@@ -2890,13 +2898,6 @@ typedef struct AVCodecContext {
     int64_t pts_correction_last_pts;       /// PTS of the last frame
     int64_t pts_correction_last_dts;       /// DTS of the last frame
 
-    /**
-     * desired sample format
-     * - encoding: Not used.
-     * - decoding: Set by user.
-     * Decoder will decode to this format if it can.
-     */
-    enum AVSampleFormat request_sample_fmt;
 
 } AVCodecContext;
 
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 7a35631eea..74bae4e295 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1627,8 +1627,9 @@ static int dca_decode_frame(AVCodecContext * avctx,
     int lfe_samples;
     int num_core_channels = 0;
     int i;
-    float *samples_flt = data;
-    int16_t *samples = data;
+    float   *samples_flt = data;
+    int16_t *samples_s16 = data;
+    int out_size;
     DCAContext *s = avctx->priv_data;
     int channels;
     int core_ss_end;
@@ -1818,11 +1819,11 @@ static int dca_decode_frame(AVCodecContext * avctx,
         return -1;
     }
 
-    data_size_tmp = (s->sample_blocks / 8) * 256 * channels;
-    data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(*samples_flt) : sizeof(*samples);
-    if (*data_size < data_size_tmp)
+    out_size = 256 / 8 * s->sample_blocks * channels *
+               (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
+    if (*data_size < out_size)
         return -1;
-    *data_size = data_size_tmp;
+    *data_size = out_size;
 
     /* filter to get final output */
     for (i = 0; i < (s->sample_blocks / 8); i++) {
@@ -1841,13 +1842,15 @@ static int dca_decode_frame(AVCodecContext * avctx,
             }
         }
 
-        /* interleave samples */
         if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
-            float_interleave(samples_flt, s->samples_chanptr, 256, channels);
+            s->fmt_conv.float_interleave(samples_flt, s->samples_chanptr, 256,
+                                         channels);
             samples_flt += 256 * channels;
         } else {
-            s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
-            samples += 256 * channels;
+            s->fmt_conv.float_to_int16_interleave(samples_s16,
+                                                  s->samples_chanptr, 256,
+                                                  channels);
+            samples_s16 += 256 * channels;
         }
     }
 
@@ -1884,10 +1887,14 @@ static av_cold int dca_decode_init(AVCodecContext * avctx)
 
     for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++)
         s->samples_chanptr[i] = s->samples + i * 256;
-    avctx->sample_fmt = avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT ?
-                        AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16;
 
-    s->scale_bias = 1.0;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        s->scale_bias = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->scale_bias = 1.0;
+    }
 
     /* allow downmixing to stereo */
     if (avctx->channels > 0 && avctx->request_channels < avctx->channels &&
@@ -1924,5 +1931,8 @@ AVCodec ff_dca_decoder = {
     .close = dca_decode_end,
     .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
     .capabilities = CODEC_CAP_CHANNEL_CONF,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
 };
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index 0e8aa5e909..c03117c2cd 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -56,11 +56,31 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src,
     }
 }
 
+void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
+                           int channels)
+{
+    int j, c;
+    unsigned int i;
+    if (channels == 2) {
+        for (i = 0; i < len; i++) {
+            dst[2*i]   = src[0][i];
+            dst[2*i+1] = src[1][i];
+        }
+    } else if (channels == 1 && len < INT_MAX / sizeof(float)) {
+        memcpy(dst, src[0], len * sizeof(float));
+    } else {
+        for (c = 0; c < channels; c++)
+            for (i = 0, j = c; i < len; i++, j += channels)
+                dst[j] = src[c][i];
+    }
+}
+
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
     c->float_to_int16             = float_to_int16_c;
     c->float_to_int16_interleave  = float_to_int16_interleave_c;
+    c->float_interleave           = ff_float_interleave_c;
 
     if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
     if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index 82811d108c..825422bed6 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -68,8 +68,17 @@ typedef struct FmtConvertContext {
      */
     void (*float_to_int16_interleave)(int16_t *dst, const float **src,
                                       long len, int channels);
+
+    /**
+     * Convert an array of interleaved float to multiple arrays of float.
+     */
+    void (*float_interleave)(float *dst, const float **src, unsigned int len,
+                             int channels);
 } FmtConvertContext;
 
+void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
+                           int channels);
+
 void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
 
 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index a843d21446..ae3d263535 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1953,6 +1953,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
                 c->h264dsp = h->h264dsp;
                 c->sps = h->sps;
                 c->pps = h->pps;
+                c->pixel_shift = h->pixel_shift;
                 init_scan_tables(c);
                 clone_tables(c, h, i);
             }
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 30cd3ab176..5f540f05f2 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -126,7 +126,8 @@ static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
     a->dsp.clear_blocks(block[0]);
 
     for(i=0; i<6; i++){
-        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0)
+        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0 ||
+            get_bits_left(&a->gb) < 0)
             return -1;
     }
     return 0;
@@ -252,6 +253,7 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx){
     return 0;
 }
 
+
 static av_cold int decode_end(AVCodecContext *avctx){
     MDECContext * const a = avctx->priv_data;
 
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 38a3e6f3c6..6227efd51f 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -2342,6 +2342,7 @@ static int decode_chunks(AVCodecContext *avctx,
             if(s2->pict_type != AV_PICTURE_TYPE_B || avctx->skip_frame <= AVDISCARD_DEFAULT){
                 if(HAVE_THREADS && avctx->active_thread_type&FF_THREAD_SLICE){
                     int i;
+                    assert(avctx->thread_count > 1);
 
                     avctx->execute(avctx, slice_decode_thread,  &s2->thread_context[0], NULL, s->slice_count, sizeof(void*));
                     for(i=0; i<s->slice_count; i++)
@@ -2510,6 +2511,7 @@ static int decode_chunks(AVCodecContext *avctx,
 
                 if(HAVE_THREADS && avctx->active_thread_type&FF_THREAD_SLICE){
                     int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
+                    assert(avctx->thread_count > 1);
                     if(threshold <= mb_y){
                         MpegEncContext *thread_context= s2->thread_context[s->slice_count];
 
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index c2c822223e..c7d830fe21 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -41,7 +41,6 @@
 
 #if CONFIG_FLOAT
 #   define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
-#   define compute_antialias compute_antialias_float
 #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
 #   define FIXR(x)        ((float)(x))
 #   define FIXHR(x)       ((float)(x))
@@ -51,7 +50,6 @@
 #   define OUT_FMT AV_SAMPLE_FMT_FLT
 #else
 #   define SHR(a,b)       ((a)>>(b))
-#   define compute_antialias compute_antialias_integer
 /* WARNING: only correct for posititive numbers */
 #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
 #   define FIXR(a)        ((int)((a) * FRAC_ONE + 0.5))
@@ -69,7 +67,7 @@
 #include "mpegaudiodata.h"
 #include "mpegaudiodectab.h"
 
-static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
+static void RENAME(compute_antialias)(MPADecodeContext *s, GranuleDef *g);
 static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
                                int *dither_state, OUT_INT *samples, int incr);
 
@@ -1480,8 +1478,7 @@ static void compute_stereo(MPADecodeContext *s,
 }
 
 #if !CONFIG_FLOAT
-static void compute_antialias_integer(MPADecodeContext *s,
-                              GranuleDef *g)
+static void compute_antialias_fixed(MPADecodeContext *s, GranuleDef *g)
 {
     int32_t *ptr, *csa;
     int n, i;
@@ -1848,7 +1845,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
             g = &s->granules[ch][gr];
 
             reorder_block(s, g);
-            compute_antialias(s, g);
+            RENAME(compute_antialias)(s, g);
             compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
         }
     } /* gr */
diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index 758ef83e05..183e5540c2 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -80,13 +80,6 @@ static void compute_antialias_float(MPADecodeContext *s,
     }
 }
 
-static av_cold int decode_end(AVCodecContext * avctx)
-{
-    MPADecodeContext *s = avctx->priv_data;
-    ff_dct_end(&s->dct);
-    return 0;
-}
-
 #if CONFIG_MP1FLOAT_DECODER
 AVCodec ff_mp1float_decoder =
 {
@@ -96,7 +89,7 @@ AVCodec ff_mp1float_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
@@ -112,7 +105,7 @@ AVCodec ff_mp2float_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
@@ -128,7 +121,7 @@ AVCodec ff_mp3float_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
@@ -144,7 +137,7 @@ AVCodec ff_mp3adufloat_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame_adu,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
diff --git a/libavcodec/options.c b/libavcodec/options.c
index a2dbb0ba73..ccf1b87c96 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -441,7 +441,12 @@ static const AVOption options[]={
 {"em", "Emergency",          0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_EMERGENCY },         INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"vo", "Voice Over",         0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_VOICE_OVER },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"ka", "Karaoke",            0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), FF_OPT_TYPE_INT, {.dbl = AV_SAMPLE_FMT_NONE }, AV_SAMPLE_FMT_NONE, AV_SAMPLE_FMT_NB-1, A|D},
+{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), FF_OPT_TYPE_INT, {.dbl = AV_SAMPLE_FMT_NONE }, AV_SAMPLE_FMT_NONE, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
+{"u8" , "8-bit unsigned integer", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_U8  }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"s16", "16-bit signed integer",  0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_S16 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"s32", "32-bit signed integer",  0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_S32 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"flt", "32-bit float",           0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_FLT }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"dbl", "64-bit double",          0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_DBL }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
 {NULL},
 };
 
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index f4b743e8ab..f93fff113f 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -979,7 +979,13 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     dsputil_init(&vc->dsp, avccontext);
     ff_fmt_convert_init(&vc->fmt_conv, avccontext);
 
-    vc->scale_bias = 32768.0f;
+    if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avccontext->sample_fmt = AV_SAMPLE_FMT_FLT;
+        vc->scale_bias = 1.0f;
+    } else {
+        avccontext->sample_fmt = AV_SAMPLE_FMT_S16;
+        vc->scale_bias = 32768.0f;
+    }
 
     if (!headers_len) {
         av_log(avccontext, AV_LOG_ERROR, "Extradata missing.\n");
@@ -1024,9 +1030,6 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     avccontext->channels    = vc->audio_channels;
     avccontext->sample_rate = vc->audio_samplerate;
     avccontext->frame_size  = FFMIN(vc->blocksize[0], vc->blocksize[1]) >> 2;
-    avccontext->sample_fmt  =
-        avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT ?
-        AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16;
 
     return 0 ;
 }
@@ -1636,15 +1639,14 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
                               len * ff_vorbis_channel_layout_offsets[vc->audio_channels - 1][i];
     }
 
-    *data_size = len * vc->audio_channels;
-    if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT) {
-        float_interleave(data, channel_ptrs, len, vc->audio_channels);
-        *data_size *= sizeof(float);
-    } else {
+    if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT)
+        vc->fmt_conv.float_interleave(data, channel_ptrs, len, vc->audio_channels);
+    else
         vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len,
                                                vc->audio_channels);
-        *data_size *= 2;
-    }
+
+    *data_size = len * vc->audio_channels *
+                 (av_get_bits_per_sample_fmt(avccontext->sample_fmt) / 8);
 
     return buf_size ;
 }
@@ -1671,5 +1673,8 @@ AVCodec ff_vorbis_decoder = {
     vorbis_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Vorbis"),
     .channel_layouts = ff_vorbis_channel_layouts,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index dc038dde73..171e52a165 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -20,6 +20,7 @@
 ;******************************************************************************
 
 %include "x86inc.asm"
+%include "x86util.asm"
 
 section .text align=16
 
@@ -89,3 +90,143 @@ FLOAT_TO_INT16_INTERLEAVE6 3dnow
 %undef pswapd
 FLOAT_TO_INT16_INTERLEAVE6 3dn2
 %undef cvtps2pi
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
+;-----------------------------------------------------------------------------
+
+%macro BUTTERFLYPS 3
+    movaps    m%3, m%1
+    unpcklps  m%1, m%2
+    unpckhps  m%3, m%2
+    SWAP %2, %3
+%endmacro
+
+%macro FLOAT_INTERLEAVE6 2
+cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
+%ifdef ARCH_X86_64
+    %define lend r10d
+    mov     lend, r2d
+%else
+    %define lend dword r2m
+%endif
+    mov    src1q, [srcq+1*gprsize]
+    mov    src2q, [srcq+2*gprsize]
+    mov    src3q, [srcq+3*gprsize]
+    mov    src4q, [srcq+4*gprsize]
+    mov    src5q, [srcq+5*gprsize]
+    mov     srcq, [srcq]
+    sub    src1q, srcq
+    sub    src2q, srcq
+    sub    src3q, srcq
+    sub    src4q, srcq
+    sub    src5q, srcq
+.loop:
+%ifidn %1, sse
+    movaps    m0, [srcq]
+    movaps    m1, [srcq+src1q]
+    movaps    m2, [srcq+src2q]
+    movaps    m3, [srcq+src3q]
+    movaps    m4, [srcq+src4q]
+    movaps    m5, [srcq+src5q]
+
+    BUTTERFLYPS 0, 1, 6
+    BUTTERFLYPS 2, 3, 6
+    BUTTERFLYPS 4, 5, 6
+
+    movaps    m6, m4
+    shufps    m4, m0, 0xe4
+    movlhps   m0, m2
+    movhlps   m6, m2
+    movaps [dstq   ], m0
+    movaps [dstq+16], m4
+    movaps [dstq+32], m6
+
+    movaps    m6, m5
+    shufps    m5, m1, 0xe4
+    movlhps   m1, m3
+    movhlps   m6, m3
+    movaps [dstq+48], m1
+    movaps [dstq+64], m5
+    movaps [dstq+80], m6
+%else ; mmx
+    movq       m0, [srcq]
+    movq       m1, [srcq+src1q]
+    movq       m2, [srcq+src2q]
+    movq       m3, [srcq+src3q]
+    movq       m4, [srcq+src4q]
+    movq       m5, [srcq+src5q]
+
+    SBUTTERFLY dq, 0, 1, 6
+    SBUTTERFLY dq, 2, 3, 6
+    SBUTTERFLY dq, 4, 5, 6
+    movq [dstq   ], m0
+    movq [dstq+ 8], m2
+    movq [dstq+16], m4
+    movq [dstq+24], m1
+    movq [dstq+32], m3
+    movq [dstq+40], m5
+%endif
+    add      srcq, mmsize
+    add      dstq, mmsize*6
+    sub      lend, mmsize/4
+    jg .loop
+%ifidn %1, mmx
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_MMX
+FLOAT_INTERLEAVE6 mmx, 0
+INIT_XMM
+FLOAT_INTERLEAVE6 sse, 7
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
+;-----------------------------------------------------------------------------
+
+%macro FLOAT_INTERLEAVE2 2
+cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
+    mov     src1q, [srcq+gprsize]
+    mov      srcq, [srcq        ]
+    sub     src1q, srcq
+.loop
+    MOVPS      m0, [srcq             ]
+    MOVPS      m1, [srcq+src1q       ]
+    MOVPS      m3, [srcq      +mmsize]
+    MOVPS      m4, [srcq+src1q+mmsize]
+
+    MOVPS      m2, m0
+    PUNPCKLDQ  m0, m1
+    PUNPCKHDQ  m2, m1
+
+    MOVPS      m1, m3
+    PUNPCKLDQ  m3, m4
+    PUNPCKHDQ  m1, m4
+
+    MOVPS [dstq         ], m0
+    MOVPS [dstq+1*mmsize], m2
+    MOVPS [dstq+2*mmsize], m3
+    MOVPS [dstq+3*mmsize], m1
+
+    add      srcq, mmsize*2
+    add      dstq, mmsize*4
+    sub      lend, mmsize/2
+    jg .loop
+%ifidn %1, mmx
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_MMX
+%define MOVPS     movq
+%define PUNPCKLDQ punpckldq
+%define PUNPCKHDQ punpckhdq
+FLOAT_INTERLEAVE2 mmx, 0
+INIT_XMM
+%define MOVPS     movaps
+%define PUNPCKLDQ unpcklps
+%define PUNPCKHDQ unpckhps
+FLOAT_INTERLEAVE2 sse, 5
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index ea41f730e8..5cd4b25e33 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -235,11 +235,40 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
         float_to_int16_interleave_3dnow(dst, src, len, channels);
 }
 
+void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
+void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
+
+void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
+void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
+
+static void float_interleave_mmx(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_mmx(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_mmx(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+
+static void float_interleave_sse(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_sse(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_sse(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & AV_CPU_FLAG_MMX) {
+        c->float_interleave = float_interleave_mmx;
 
         if(mm_flags & AV_CPU_FLAG_3DNOW){
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
@@ -256,6 +285,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
             c->float_to_int16 = float_to_int16_sse;
             c->float_to_int16_interleave = float_to_int16_interleave_sse;
+            c->float_interleave = float_interleave_sse;
         }
         if(mm_flags & AV_CPU_FLAG_SSE2){
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
author	Michael Niedermayer <michaelni@gmx.at>	2011-05-19 05:12:45 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2011-05-19 06:00:31 +0200
commit	75a37b57a59f6701d9443c5f7a0ceec108b27a18 (patch)
tree	1eea866003f3d7385261dea40b5b8063e87f9b8a /libavcodec
parent	8529f9b36b7c1b8f2cb36ba2709983517c4b6458 (diff)
parent	41e21e4db623ebd77f431a6f30cf21d62d9e1f33 (diff)