truehd: add hand-scheduled ARM asm version of ff_mlp_pack_output.

Profiling results for overall decode and the output_data function in particular are as follows: Before After Mean StdDev Mean StdDev Confidence Change 6:2 total 339.6 15.1 329.3 16.0 95.8% +3.1% (insignificant) 6:2 function 24.6 6.0 9.9 3.1 100.0% +148.5% 8:2 total 324.5 15.5 323.6 14.3 15.2% +0.3% (insignificant) 8:2 function 20.4 3.9 9.9 3.4 100.0% +104.7% 6:6 total 572.8 20.6 539.9 24.2 100.0% +6.1% 6:6 function 54.5 5.6 16.0 3.8 100.0% +240.9% 8:8 total 741.5 21.2 702.5 18.5 100.0% +5.6% 8:8 function 63.9 7.6 18.4 4.8 100.0% +247.3% The assembly version has also been tested with a fuzz tester to ensure that any combinations of inputs not exercised by my available test streams still generate mathematically identical results to the C version. Signed-off-by: Martin Storsjö <martin@martin.st>
author: Ben Avison <bavison@riscosopen.org> 2014-03-20 18:58:40 +0000
committer: Martin Storsjö <martin@martin.st> 2014-03-26 19:54:32 +0200
commit: 3b5946bccef6cd219f01d22e542ca5c6de68a7be (patch)
tree: 20f5eda099a221bcfba80044c60318704b103c92 /libavcodec/arm/mlpdsp_init_arm.c
parent: b9eb03416d93a5c4ece27ffef5e6e11c81bec6fa (diff)
1 files changed, 94 insertions, 0 deletions
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index e3eeb21e06..4cdd10caf5 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -41,6 +41,98 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples,
                                  int access_unit_size_pow2,
                                  int32_t mask);
 
+#define DECLARE_PACK(order,channels,shift) \
+    int32_t ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
+#define ENUMERATE_PACK(order,channels,shift) \
+    ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6,
+#define PACK_CHANNELS(macro,order,channels) \
+        macro(order,channels,0) \
+        macro(order,channels,1) \
+        macro(order,channels,2) \
+        macro(order,channels,3) \
+        macro(order,channels,4) \
+        macro(order,channels,5) \
+        macro(order,channels,mixed)
+#define PACK_ORDER(macro,order) \
+        PACK_CHANNELS(macro,order,2) \
+        PACK_CHANNELS(macro,order,6) \
+        PACK_CHANNELS(macro,order,8)
+#define PACK_ALL(macro) \
+        PACK_ORDER(macro,outof) \
+        PACK_ORDER(macro,in)
+PACK_ALL(DECLARE_PACK)
+
+#define ff_mlp_pack_output_outoforder_2ch_mixedshift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_mixedshift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_mixedshift_armv6 0
+#if CONFIG_THUMB
+#define ff_mlp_pack_output_outoforder_2ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_5shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_5shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_5shift_armv6 0
+#endif
+
+static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
+                                              int8_t *output_shift,
+                                              uint8_t max_matrix_channel,
+                                              int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
+{
+    int ch_index;
+    int shift = output_shift[0] < 0 || output_shift[0] > 5 ? 6 : output_shift[0];
+    int inorder = 1;
+    static int32_t (*const routine[2*3*7])(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) = {
+            PACK_ALL(ENUMERATE_PACK)
+    };
+    int i;
+
+    if (!is32) // don't support 16-bit output (it's not used by TrueHD)
+        return ff_mlp_pack_output;
+
+    switch (max_matrix_channel) {
+    case 1:
+        ch_index = 0;
+        break;
+    case 5:
+        ch_index = 1;
+        break;
+    case 7:
+        ch_index = 2;
+        break;
+    default:
+        return ff_mlp_pack_output;
+    }
+
+    for (i = 0; i <= max_matrix_channel; i++) {
+        if (shift != 6 && output_shift[i] != shift)
+            shift = 6; // indicate mixed shifts
+        if (ch_assign[i] != i)
+            inorder = 0;
+    }
+#if CONFIG_THUMB
+    if (!inorder)
+        return ff_mlp_pack_output; // can't currently handle an order array except in ARM mode
+#else
+    if (shift == 6 && !inorder)
+        return ff_mlp_pack_output; // can't currently handle both an order array and a shift array
+#endif
+
+    return routine[(inorder*3+ch_index)*7+shift];
+}
+
 av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -49,4 +141,6 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
         c->mlp_filter_channel = ff_mlp_filter_channel_arm;
         c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
     }
+    if (have_armv6(cpu_flags))
+        c->mlp_select_pack_output = mlp_select_pack_output_armv6;
 }
author	Ben Avison <bavison@riscosopen.org>	2014-03-20 18:58:40 +0000
committer	Martin Storsjö <martin@martin.st>	2014-03-26 19:54:32 +0200
commit	3b5946bccef6cd219f01d22e542ca5c6de68a7be (patch)
tree	20f5eda099a221bcfba80044c60318704b103c92 /libavcodec/arm/mlpdsp_init_arm.c
parent	b9eb03416d93a5c4ece27ffef5e6e11c81bec6fa (diff)