From b01a2562ae3ff50198ba189c878023ee4ac34c72 Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Thu, 20 Mar 2014 18:59:17 +0000
Subject: truehd: break out part of output_data into platform-specific
 callback.

Verified with profiling that this doesn't have a measurable effect upon
overall performance.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mlpdec.c | 40 +++++++++++++++++++++++-----------------
 libavcodec/mlpdsp.c | 38 ++++++++++++++++++++++++++++++++++++++
 libavcodec/mlpdsp.h | 22 ++++++++++++++++++++++
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 01ded5c3e1..061dabc72c 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -363,6 +363,10 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
         m->avctx->sample_fmt = AV_SAMPLE_FMT_S32;
     else
         m->avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(m->substream[m->max_decoded_substream].ch_assign,
+                                                           m->substream[m->max_decoded_substream].output_shift,
+                                                           m->substream[m->max_decoded_substream].max_matrix_channel,
+                                                           m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
 
     m->params_valid = 1;
     for (substr = 0; substr < MAX_SUBSTREAMS; substr++)
@@ -612,6 +616,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (substr == m->max_decoded_substream) {
         m->avctx->channels       = s->max_matrix_channel + 1;
         m->avctx->channel_layout = s->ch_layout;
+        m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
+                                                               s->output_shift,
+                                                               s->max_matrix_channel,
+                                                               m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
 
         if (m->avctx->codec_id == AV_CODEC_ID_MLP && m->needs_reordering) {
             if (m->avctx->channel_layout == (AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY) ||
@@ -857,9 +865,15 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
                 return ret;
 
     if (s->param_presence_flags & PARAM_OUTSHIFT)
-        if (get_bits1(gbp))
+        if (get_bits1(gbp)) {
             for (ch = 0; ch <= s->max_matrix_channel; ch++)
                 s->output_shift[ch] = get_sbits(gbp, 4);
+            if (substr == m->max_decoded_substream)
+                m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
+                                                                       s->output_shift,
+                                                                       s->max_matrix_channel,
+                                                                       m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
+        }
 
     if (s->param_presence_flags & PARAM_QUANTSTEP)
         if (get_bits1(gbp))
@@ -1058,9 +1072,6 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
 {
     AVCodecContext *avctx = m->avctx;
     SubStream *s = &m->substream[substr];
-    unsigned int i, out_ch = 0;
-    int32_t *data_32;
-    int16_t *data_16;
     int ret;
     int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
 
@@ -1078,19 +1089,14 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
     frame->nb_samples = s->blockpos;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    data_32 = (int32_t *)frame->data[0];
-    data_16 = (int16_t *)frame->data[0];
-
-    for (i = 0; i < s->blockpos; i++) {
-        for (out_ch = 0; out_ch <= s->max_matrix_channel; out_ch++) {
-            int mat_ch = s->ch_assign[out_ch];
-            int32_t sample = m->sample_buffer[i][mat_ch]
-                          << s->output_shift[mat_ch];
-            s->lossless_check_data ^= (sample & 0xffffff) << mat_ch;
-            if (is32) *data_32++ = sample << 8;
-            else      *data_16++ = sample >> 8;
-        }
-    }
+    s->lossless_check_data = m->dsp.mlp_pack_output(s->lossless_check_data,
+                                                    s->blockpos,
+                                                    m->sample_buffer,
+                                                    frame->data[0],
+                                                    s->ch_assign,
+                                                    s->output_shift,
+                                                    s->max_matrix_channel,
+                                                    is32);
 
     /* Update matrix encoding side data */
     if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index 7a359b0201..3ae8c37708 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -89,10 +89,48 @@ void ff_mlp_rematrix_channel(int32_t *samples,
     }
 }
 
+static int32_t (*mlp_select_pack_output(uint8_t *ch_assign,
+                                        int8_t *output_shift,
+                                        uint8_t max_matrix_channel,
+                                        int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
+{
+    return ff_mlp_pack_output;
+}
+
+int32_t ff_mlp_pack_output(int32_t lossless_check_data,
+                           uint16_t blockpos,
+                           int32_t (*sample_buffer)[MAX_CHANNELS],
+                           void *data,
+                           uint8_t *ch_assign,
+                           int8_t *output_shift,
+                           uint8_t max_matrix_channel,
+                           int is32)
+{
+    unsigned int i, out_ch = 0;
+    int32_t *data_32 = data;
+    int16_t *data_16 = data;
+
+    for (i = 0; i < blockpos; i++) {
+        for (out_ch = 0; out_ch <= max_matrix_channel; out_ch++) {
+            int mat_ch = ch_assign[out_ch];
+            int32_t sample = sample_buffer[i][mat_ch]
+                          << output_shift[mat_ch];
+            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
+            if (is32)
+                *data_32++ = sample << 8;
+            else
+                *data_16++ = sample >> 8;
+        }
+    }
+    return lossless_check_data;
+}
+
 av_cold void ff_mlpdsp_init(MLPDSPContext *c)
 {
     c->mlp_filter_channel = mlp_filter_channel;
     c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
+    c->mlp_select_pack_output = mlp_select_pack_output;
+    c->mlp_pack_output = ff_mlp_pack_output;
     if (ARCH_ARM)
         ff_mlpdsp_init_arm(c);
     if (ARCH_X86)
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index f98e9be41d..a0edeb7762 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -23,6 +23,7 @@
 #define AVCODEC_MLPDSP_H
 
 #include <stdint.h>
+#include "mlp.h"
 
 void ff_mlp_rematrix_channel(int32_t *samples,
                              const int32_t *coeffs,
@@ -36,6 +37,15 @@ void ff_mlp_rematrix_channel(int32_t *samples,
                              int access_unit_size_pow2,
                              int32_t mask);
 
+int32_t ff_mlp_pack_output(int32_t lossless_check_data,
+                           uint16_t blockpos,
+                           int32_t (*sample_buffer)[MAX_CHANNELS],
+                           void *data,
+                           uint8_t *ch_assign,
+                           int8_t *output_shift,
+                           uint8_t max_matrix_channel,
+                           int is32);
+
 typedef struct MLPDSPContext {
     void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
                                int firorder, int iirorder,
@@ -52,6 +62,18 @@ typedef struct MLPDSPContext {
                                  int matrix_noise_shift,
                                  int access_unit_size_pow2,
                                  int32_t mask);
+    int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
+                                        int8_t *output_shift,
+                                        uint8_t max_matrix_channel,
+                                        int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
+    int32_t (*mlp_pack_output)(int32_t lossless_check_data,
+                               uint16_t blockpos,
+                               int32_t (*sample_buffer)[MAX_CHANNELS],
+                               void *data,
+                               uint8_t *ch_assign,
+                               int8_t *output_shift,
+                               uint8_t max_matrix_channel,
+                               int is32);
 } MLPDSPContext;
 
 void ff_mlpdsp_init(MLPDSPContext *c);
-- 
cgit v1.2.3