summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/acelp_pitch_delay.h74
-rw-r--r--libavcodec/arm/aac.h3
-rw-r--r--libavcodec/arm/ac3dsp_arm.S1
-rw-r--r--libavcodec/arm/ac3dsp_armv6.S2
-rw-r--r--libavcodec/arm/ac3dsp_neon.S1
-rw-r--r--libavcodec/arm/asm.S93
-rw-r--r--libavcodec/arm/dcadsp_neon.S1
-rw-r--r--libavcodec/arm/dsputil_arm.S10
-rw-r--r--libavcodec/arm/dsputil_armv6.S76
-rw-r--r--libavcodec/arm/dsputil_neon.S3
-rw-r--r--libavcodec/arm/dsputil_vfp.S21
-rw-r--r--libavcodec/arm/fmtconvert_neon.S4
-rw-r--r--libavcodec/arm/fmtconvert_vfp.S3
-rw-r--r--libavcodec/arm/h264dsp_neon.S98
-rw-r--r--libavcodec/arm/h264idct_neon.S23
-rw-r--r--libavcodec/arm/mathops.h3
-rw-r--r--libavcodec/arm/mdct_neon.S4
-rw-r--r--libavcodec/arm/mpegaudiodsp_fixed_armv6.S6
-rw-r--r--libavcodec/arm/mpegvideo_armv5te_s.S12
-rw-r--r--libavcodec/arm/mpegvideo_neon.S4
-rw-r--r--libavcodec/arm/rdft_neon.S1
-rw-r--r--libavcodec/arm/simple_idct_arm.S32
-rw-r--r--libavcodec/arm/simple_idct_armv5te.S39
-rw-r--r--libavcodec/arm/simple_idct_armv6.S33
-rw-r--r--libavcodec/arm/simple_idct_neon.S6
-rw-r--r--libavcodec/arm/synth_filter_neon.S2
-rw-r--r--libavcodec/arm/vp56_arith.h27
-rw-r--r--libavcodec/arm/vp8_armv6.S36
-rw-r--r--libavcodec/arm/vp8dsp_neon.S16
-rw-r--r--libavcodec/avcodec.h8
-rw-r--r--libavcodec/celp_filters.h2
-rw-r--r--libavcodec/fft.h2
-rw-r--r--libavcodec/g729dec.c2
-rw-r--r--libavcodec/h264_parser.c2
-rw-r--r--libavcodec/lagarith.c26
-rw-r--r--libavcodec/lagarithrac.h14
-rw-r--r--libavcodec/lcldec.c14
-rw-r--r--libavcodec/lsp.c6
-rw-r--r--libavcodec/lsp.h42
-rw-r--r--libavcodec/motion_est.c2
-rw-r--r--libavcodec/motion_est_template.c8
-rw-r--r--libavcodec/mpegvideo.h2
-rw-r--r--libavcodec/nuv.c14
-rw-r--r--libavcodec/qcelpdata.h6
-rw-r--r--libavcodec/qcelpdec.c8
-rw-r--r--libavcodec/rtjpeg.c38
-rw-r--r--libavcodec/tableprint.h2
-rw-r--r--libavcodec/twinvq.c8
-rw-r--r--libavcodec/vaapi.c2
-rw-r--r--libavcodec/vaapi.h4
-rw-r--r--libavcodec/vaapi_internal.h2
-rw-r--r--libavcodec/vdpau.c2
-rw-r--r--libavcodec/vdpau.h12
-rw-r--r--libavcodec/x86/idct_sse2_xvid.c2
-rw-r--r--libavcodec/x86/idct_xvid.h2
-rw-r--r--libavcodec/xsubenc.c4
56 files changed, 570 insertions, 300 deletions
diff --git a/libavcodec/acelp_pitch_delay.h b/libavcodec/acelp_pitch_delay.h
index ce06bc2539..72977f1f49 100644
--- a/libavcodec/acelp_pitch_delay.h
+++ b/libavcodec/acelp_pitch_delay.h
@@ -30,11 +30,11 @@
#define PITCH_DELAY_MAX 143
/**
- * \brief Decode pitch delay of the first subframe encoded by 8 bits with 1/3
+ * @brief Decode pitch delay of the first subframe encoded by 8 bits with 1/3
* resolution.
- * \param ac_index adaptive codebook index (8 bits)
+ * @param ac_index adaptive codebook index (8 bits)
*
- * \return pitch delay in 1/3 units
+ * @return pitch delay in 1/3 units
*
* Pitch delay is coded:
* with 1/3 resolution, 19 < pitch_delay < 85
@@ -43,18 +43,18 @@
int ff_acelp_decode_8bit_to_1st_delay3(int ac_index);
/**
- * \brief Decode pitch delay of the second subframe encoded by 5 or 6 bits
+ * @brief Decode pitch delay of the second subframe encoded by 5 or 6 bits
* with 1/3 precision.
- * \param ac_index adaptive codebook index (5 or 6 bits)
- * \param pitch_delay_min lower bound (integer) of pitch delay interval
+ * @param ac_index adaptive codebook index (5 or 6 bits)
+ * @param pitch_delay_min lower bound (integer) of pitch delay interval
* for second subframe
*
- * \return pitch delay in 1/3 units
+ * @return pitch delay in 1/3 units
*
* Pitch delay is coded:
* with 1/3 resolution, -6 < pitch_delay - int(prev_pitch_delay) < 5
*
- * \remark The routine is used in G.729 @@8k, AMR @@10.2k, AMR @@7.95k,
+ * @remark The routine is used in G.729 @@8k, AMR @@10.2k, AMR @@7.95k,
* AMR @@7.4k for the second subframe.
*/
int ff_acelp_decode_5_6_bit_to_2nd_delay3(
@@ -62,19 +62,19 @@ int ff_acelp_decode_5_6_bit_to_2nd_delay3(
int pitch_delay_min);
/**
- * \brief Decode pitch delay with 1/3 precision.
- * \param ac_index adaptive codebook index (4 bits)
- * \param pitch_delay_min lower bound (integer) of pitch delay interval for
+ * @brief Decode pitch delay with 1/3 precision.
+ * @param ac_index adaptive codebook index (4 bits)
+ * @param pitch_delay_min lower bound (integer) of pitch delay interval for
* second subframe
*
- * \return pitch delay in 1/3 units
+ * @return pitch delay in 1/3 units
*
* Pitch delay is coded:
* integers only, -6 < pitch_delay - int(prev_pitch_delay) <= -2
* with 1/3 resolution, -2 < pitch_delay - int(prev_pitch_delay) < 1
* integers only, 1 <= pitch_delay - int(prev_pitch_delay) < 5
*
- * \remark The routine is used in G.729 @@6.4k, AMR @@6.7k, AMR @@5.9k,
+ * @remark The routine is used in G.729 @@6.4k, AMR @@6.7k, AMR @@5.9k,
* AMR @@5.15k, AMR @@4.75k for the second subframe.
*/
int ff_acelp_decode_4bit_to_2nd_delay3(
@@ -82,44 +82,44 @@ int ff_acelp_decode_4bit_to_2nd_delay3(
int pitch_delay_min);
/**
- * \brief Decode pitch delay of the first subframe encoded by 9 bits
+ * @brief Decode pitch delay of the first subframe encoded by 9 bits
* with 1/6 precision.
- * \param ac_index adaptive codebook index (9 bits)
+ * @param ac_index adaptive codebook index (9 bits)
*
- * \return pitch delay in 1/6 units
+ * @return pitch delay in 1/6 units
*
* Pitch delay is coded:
* with 1/6 resolution, 17 < pitch_delay < 95
* integers only, 95 <= pitch_delay <= 143
*
- * \remark The routine is used in AMR @@12.2k for the first and third subframes.
+ * @remark The routine is used in AMR @@12.2k for the first and third subframes.
*/
int ff_acelp_decode_9bit_to_1st_delay6(int ac_index);
/**
- * \brief Decode pitch delay of the second subframe encoded by 6 bits
+ * @brief Decode pitch delay of the second subframe encoded by 6 bits
* with 1/6 precision.
- * \param ac_index adaptive codebook index (6 bits)
- * \param pitch_delay_min lower bound (integer) of pitch delay interval for
+ * @param ac_index adaptive codebook index (6 bits)
+ * @param pitch_delay_min lower bound (integer) of pitch delay interval for
* second subframe
*
- * \return pitch delay in 1/6 units
+ * @return pitch delay in 1/6 units
*
* Pitch delay is coded:
* with 1/6 resolution, -6 < pitch_delay - int(prev_pitch_delay) < 5
*
- * \remark The routine is used in AMR @@12.2k for the second and fourth subframes.
+ * @remark The routine is used in AMR @@12.2k for the second and fourth subframes.
*/
int ff_acelp_decode_6bit_to_2nd_delay6(
int ac_index,
int pitch_delay_min);
/**
- * \brief Update past quantized energies
- * \param[in,out] quant_energy past quantized energies (5.10)
- * \param gain_corr_factor gain correction factor
- * \param log2_ma_pred_order log2() of MA prediction order
- * \param erasure frame erasure flag
+ * @brief Update past quantized energies
+ * @param[in,out] quant_energy past quantized energies (5.10)
+ * @param gain_corr_factor gain correction factor
+ * @param log2_ma_pred_order log2() of MA prediction order
+ * @param erasure frame erasure flag
*
* If frame erasure flag is not equal to zero, memory is updated with
* averaged energy, attenuated by 4dB:
@@ -128,7 +128,7 @@ int ff_acelp_decode_6bit_to_2nd_delay6(
* In normal mode memory is updated with
* Er - Ep = 20 * log10(gain_corr_factor)
*
- * \remark The routine is used in G.729 and AMR (all modes).
+ * @remark The routine is used in G.729 and AMR (all modes).
*/
void ff_acelp_update_past_gain(
int16_t* quant_energy,
@@ -137,16 +137,16 @@ void ff_acelp_update_past_gain(
int erasure);
/**
- * \brief Decode the adaptive codebook gain and add
+ * @brief Decode the adaptive codebook gain and add
* correction (4.1.5 and 3.9.1 of G.729).
- * \param dsp initialized dsputil context
- * \param gain_corr_factor gain correction factor (2.13)
- * \param fc_v fixed-codebook vector (2.13)
- * \param mr_energy mean innovation energy and fixed-point correction (7.13)
- * \param[in,out] quant_energy past quantized energies (5.10)
- * \param subframe_size length of subframe
+ * @param dsp initialized dsputil context
+ * @param gain_corr_factor gain correction factor (2.13)
+ * @param fc_v fixed-codebook vector (2.13)
+ * @param mr_energy mean innovation energy and fixed-point correction (7.13)
+ * @param[in,out] quant_energy past quantized energies (5.10)
+ * @param subframe_size length of subframe
*
- * \return quantized fixed-codebook gain (14.1)
+ * @return quantized fixed-codebook gain (14.1)
*
* The routine implements equations 69, 66 and 71 of the G.729 specification (3.9.1)
*
@@ -205,7 +205,7 @@ void ff_acelp_update_past_gain(
*
* mr_energy = Em + 10log(N) + 10log(2^26)
*
- * \remark The routine is used in G.729 and AMR (all modes).
+ * @remark The routine is used in G.729 and AMR (all modes).
*/
int16_t ff_acelp_decode_gain_code(
DSPContext *dsp,
diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h
index 3b14c094c6..bd4d293f02 100644
--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@@ -114,12 +114,15 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
"vmov d1, %2, %3 \n\t"
"lsls %6, %6, #1 \n\t"
"and %0, %5, #1<<31 \n\t"
+ "it cs \n\t"
"lslcs %5, %5, #1 \n\t"
"lsls %6, %6, #1 \n\t"
"and %1, %5, #1<<31 \n\t"
+ "it cs \n\t"
"lslcs %5, %5, #1 \n\t"
"lsls %6, %6, #1 \n\t"
"and %2, %5, #1<<31 \n\t"
+ "it cs \n\t"
"lslcs %5, %5, #1 \n\t"
"vmov d4, %0, %1 \n\t"
"and %3, %5, #1<<31 \n\t"
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
index 545714cff1..9a7d20eb7b 100644
--- a/libavcodec/arm/ac3dsp_arm.S
+++ b/libavcodec/arm/ac3dsp_arm.S
@@ -27,6 +27,7 @@ function ff_ac3_update_bap_counts_arm, export=1
lsl r3, lr, #1
ldrh r12, [r0, r3]
subs r2, r2, #1
+ it gt
ldrbgt lr, [r1], #1
add r12, r12, #1
strh r12, [r0, r3]
diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S
index 2b2f2acf22..615baf94e0 100644
--- a/libavcodec/arm/ac3dsp_armv6.S
+++ b/libavcodec/arm/ac3dsp_armv6.S
@@ -42,9 +42,11 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1
mov r11, r10
ldrb r10, [r4], #1 @ band_start_tab[band++]
subs r9, r9, r5 @ - floor
+ it lt
movlt r9, #0
cmp r10, r3 @ - end
and r9, r9, r8 @ & 0x1fe0
+ ite gt
subgt r8, r3, r11
suble r8, r10, r11
add r9, r9, r5 @ + floor => m
diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S
index 946b39f25b..fdf1deabc9 100644
--- a/libavcodec/arm/ac3dsp_neon.S
+++ b/libavcodec/arm/ac3dsp_neon.S
@@ -41,6 +41,7 @@ endfunc
function ff_ac3_exponent_min_neon, export=1
cmp r1, #0
+ it eq
bxeq lr
push {lr}
mov r12, #256
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S
index bb999fd61a..fc7ee60357 100644
--- a/libavcodec/arm/asm.S
+++ b/libavcodec/arm/asm.S
@@ -26,7 +26,16 @@
# define ELF @
#endif
+#if CONFIG_THUMB
+# define A @
+# define T
+#else
+# define A
+# define T @
+#endif
+
.syntax unified
+T .thumb
.macro require8 val=1
ELF .eabi_attribute 24, \val
@@ -82,6 +91,90 @@ ELF .size \name, . - \name
#endif
.endm
+.macro ldr_pre rt, rn, rm:vararg
+A ldr \rt, [\rn, \rm]!
+T add \rn, \rn, \rm
+T ldr \rt, [\rn]
+.endm
+
+.macro ldr_post rt, rn, rm:vararg
+A ldr \rt, [\rn], \rm
+T ldr \rt, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro ldrd_reg rt, rt2, rn, rm
+A ldrd \rt, \rt2, [\rn, \rm]
+T add \rt, \rn, \rm
+T ldrd \rt, \rt2, [\rt]
+.endm
+
+.macro ldrd_post rt, rt2, rn, rm
+A ldrd \rt, \rt2, [\rn], \rm
+T ldrd \rt, \rt2, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro ldrh_pre rt, rn, rm
+A ldrh \rt, [\rn, \rm]!
+T add \rn, \rn, \rm
+T ldrh \rt, [\rn]
+.endm
+
+.macro ldrh_dpre rt, rn, rm
+A ldrh \rt, [\rn, -\rm]!
+T sub \rn, \rn, \rm
+T ldrh \rt, [\rn]
+.endm
+
+.macro ldrh_post rt, rn, rm
+A ldrh \rt, [\rn], \rm
+T ldrh \rt, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro str_post rt, rn, rm:vararg
+A str \rt, [\rn], \rm
+T str \rt, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro strb_post rt, rn, rm:vararg
+A strb \rt, [\rn], \rm
+T strb \rt, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro strd_post rt, rt2, rn, rm
+A strd \rt, \rt2, [\rn], \rm
+T strd \rt, \rt2, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro strh_pre rt, rn, rm
+A strh \rt, [\rn, \rm]!
+T add \rn, \rn, \rm
+T strh \rt, [\rn]
+.endm
+
+.macro strh_dpre rt, rn, rm
+A strh \rt, [\rn, -\rm]!
+T sub \rn, \rn, \rm
+T strh \rt, [\rn]
+.endm
+
+.macro strh_post rt, rn, rm
+A strh \rt, [\rn], \rm
+T strh \rt, [\rn]
+T add \rn, \rn, \rm
+.endm
+
+.macro strh_dpost rt, rn, rm
+A strh \rt, [\rn], -\rm
+T strh \rt, [\rn]
+T sub \rn, \rn, \rm
+.endm
+
#if HAVE_VFP_ARGS
.eabi_attribute 28, 1
# define VFP
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index c3bddd3e41..852527a59e 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -27,6 +27,7 @@ function ff_dca_lfe_fir_neon, export=1
add r5, r2, #256*4-16 @ cf1
sub r1, r1, #12
cmp r3, #32
+ ite eq
moveq r6, #256/32
movne r6, #256/64
NOVFP vldr s0, [sp, #16] @ scale
diff --git a/libavcodec/arm/dsputil_arm.S b/libavcodec/arm/dsputil_arm.S
index 7ee85e808b..c614206bac 100644
--- a/libavcodec/arm/dsputil_arm.S
+++ b/libavcodec/arm/dsputil_arm.S
@@ -554,10 +554,12 @@ endfunc
and r9, r5, r14
and r10, r6, r14
and r11, r7, r14
+ it eq
andeq r14, r14, r14, \rnd #1
add r8, r8, r10
add r9, r9, r11
ldr r12, =0xfcfcfcfc >> 2
+ itt eq
addeq r8, r8, r14
addeq r9, r9, r14
and r4, r12, r4, lsr #2
@@ -638,8 +640,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #4] /* moved form [A] */
@@ -654,8 +658,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
ldr r4, [r1, #4] /* moved form [B] */
@@ -676,8 +682,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #12] /* moved from [D] */
@@ -692,8 +700,10 @@ function ff_add_pixels_clamped_arm, export=1
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
add r0, r0, #16 /* moved from [E] */
diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S
index 214d947da3..a2c8588fad 100644
--- a/libavcodec/arm/dsputil_armv6.S
+++ b/libavcodec/arm/dsputil_armv6.S
@@ -47,16 +47,16 @@ function ff_put_pixels16_armv6, export=1
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
- ldr r4, [r1], r2
+ ldr_post r4, r1, r2
strd r6, r7, [r0, #8]
ldr r9, [r1, #4]
- strd r4, r5, [r0], r2
+ strd_post r4, r5, r0, r2
ldr r10, [r1, #8]
ldr r11, [r1, #12]
- ldr r8, [r1], r2
+ ldr_post r8, r1, r2
strd r10, r11, [r0, #8]
subs r3, r3, #2
- strd r8, r9, [r0], r2
+ strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11}
@@ -67,12 +67,12 @@ function ff_put_pixels8_armv6, export=1
push {r4-r7}
1:
ldr r5, [r1, #4]
- ldr r4, [r1], r2
+ ldr_post r4, r1, r2
ldr r7, [r1, #4]
- strd r4, r5, [r0], r2
- ldr r6, [r1], r2
+ strd_post r4, r5, r0, r2
+ ldr_post r6, r1, r2
subs r3, r3, #2
- strd r6, r7, [r0], r2
+ strd_post r6, r7, r0, r2
bne 1b
pop {r4-r7}
@@ -90,7 +90,7 @@ function ff_put_pixels8_x2_armv6, export=1
ldr r5, [r1, #4]
ldr r7, [r1, #5]
lsr r6, r4, #8
- ldr r8, [r1, r2]!
+ ldr_pre r8, r1, r2
orr r6, r6, r5, lsl #24
ldr r9, [r1, #4]
ldr r11, [r1, #5]
@@ -112,9 +112,9 @@ function ff_put_pixels8_x2_armv6, export=1
uhadd8 r9, r9, r11
and r6, r6, r12
uadd8 r8, r8, r14
- strd r4, r5, [r0], r2
+ strd_post r4, r5, r0, r2
uadd8 r9, r9, r6
- strd r8, r9, [r0], r2
+ strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11, pc}
@@ -127,7 +127,7 @@ function ff_put_pixels8_y2_armv6, export=1
orr r12, r12, r12, lsl #16
ldr r4, [r1]
ldr r5, [r1, #4]
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
@@ -136,7 +136,7 @@ function ff_put_pixels8_y2_armv6, export=1
uhadd8 r9, r5, r7
eor r11, r5, r7
and r10, r10, r12
- ldr r4, [r1, r2]!
+ ldr_pre r4, r1, r2
uadd8 r8, r8, r10
and r11, r11, r12
uadd8 r9, r9, r11
@@ -148,11 +148,11 @@ function ff_put_pixels8_y2_armv6, export=1
eor r7, r5, r7
uadd8 r10, r10, r6
and r7, r7, r12
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
uadd8 r11, r11, r7
- strd r8, r9, [r0], r2
+ strd_post r8, r9, r0, r2
ldr r7, [r1, #4]
- strd r10, r11, [r0], r2
+ strd_post r10, r11, r0, r2
bne 1b
pop {r4-r11}
@@ -166,7 +166,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r7, [r1, #5]
- ldr r8, [r1, r2]!
+ ldr_pre r8, r1, r2
ldr r9, [r1, #4]
ldr r14, [r1, #5]
add r1, r1, r2
@@ -191,16 +191,16 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr}
ldr r4, [r1]
ldr r5, [r1, #4]
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
- ldr r4, [r1, r2]!
+ ldr_pre r4, r1, r2
uhadd8 r9, r5, r7
ldr r5, [r1, #4]
uhadd8 r12, r4, r6
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
uhadd8 r14, r5, r7
ldr r7, [r1, #4]
stm r0, {r8,r9}
@@ -220,44 +220,44 @@ function ff_avg_pixels8_armv6, export=1
orr lr, lr, lr, lsl #16
ldrd r4, r5, [r0]
ldr r10, [r1, #4]
- ldr r9, [r1], r2
+ ldr_post r9, r1, r2
subs r3, r3, #2
1:
pld [r1, r2]
eor r8, r4, r9
uhadd8 r4, r4, r9
eor r12, r5, r10
- ldrd r6, r7, [r0, r2]
+ ldrd_reg r6, r7, r0, r2
uhadd8 r5, r5, r10
and r8, r8, lr
ldr r10, [r1, #4]
and r12, r12, lr
uadd8 r4, r4, r8
- ldr r9, [r1], r2
+ ldr_post r9, r1, r2
eor r8, r6, r9
uadd8 r5, r5, r12
pld [r1, r2, lsl #1]
eor r12, r7, r10
uhadd8 r6, r6, r9
- strd r4, r5, [r0], r2
+ strd_post r4, r5, r0, r2
uhadd8 r7, r7, r10
beq 2f
and r8, r8, lr
- ldrd r4, r5, [r0, r2]
+ ldrd_reg r4, r5, r0, r2
uadd8 r6, r6, r8
ldr r10, [r1, #4]
and r12, r12, lr
subs r3, r3, #2
uadd8 r7, r7, r12
- ldr r9, [r1], r2
- strd r6, r7, [r0], r2
+ ldr_post r9, r1, r2
+ strd_post r6, r7, r0, r2
b 1b
2:
and r8, r8, lr
and r12, r12, lr
uadd8 r6, r6, r8
uadd8 r7, r7, r12
- strd r6, r7, [r0], r2
+ strd_post r6, r7, r0, r2
pop {r4-r10, pc}
endfunc
@@ -284,7 +284,7 @@ function ff_add_pixels_clamped_armv6, export=1
orr r6, r8, r5, lsl #8
orr r7, r4, lr, lsl #8
subs r3, r3, #1
- strd r6, r7, [r1], r2
+ strd_post r6, r7, r1, r2
bgt 1b
pop {r4-r8,pc}
endfunc
@@ -294,7 +294,7 @@ function ff_get_pixels_armv6, export=1
push {r4-r8, lr}
mov lr, #8
1:
- ldrd r4, r5, [r1], r2
+ ldrd_post r4, r5, r1, r2
subs lr, lr, #1
uxtb16 r6, r4
uxtb16 r4, r4, ror #8
@@ -317,8 +317,8 @@ function ff_diff_pixels_armv6, export=1
push {r4-r9, lr}
mov lr, #8
1:
- ldrd r4, r5, [r1], r3
- ldrd r6, r7, [r2], r3
+ ldrd_post r4, r5, r1, r3
+ ldrd_post r6, r7, r2, r3
uxtb16 r8, r4
uxtb16 r4, r4, ror #8
uxtb16 r9, r6
@@ -492,19 +492,19 @@ function ff_pix_abs8_armv6, export=1
push {r4-r9, lr}
mov r0, #0
mov lr, #0
- ldrd r4, r5, [r1], r3
+ ldrd_post r4, r5, r1, r3
1:
subs r12, r12, #2
ldr r7, [r2, #4]
- ldr r6, [r2], r3
- ldrd r8, r9, [r1], r3
+ ldr_post r6, r2, r3
+ ldrd_post r8, r9, r1, r3
usada8 r0, r4, r6, r0
pld [r2, r3]
usada8 lr, r5, r7, lr
ldr r7, [r2, #4]
- ldr r6, [r2], r3
+ ldr_post r6, r2, r3
beq 2f
- ldrd r4, r5, [r1], r3
+ ldrd_post r4, r5, r1, r3
usada8 r0, r8, r6, r0
pld [r2, r3]
usada8 lr, r9, r7, lr
@@ -613,7 +613,7 @@ function ff_pix_sum_armv6, export=1
ldr r7, [r0, #12]
usada8 r2, r6, lr, r2
beq 2f
- ldr r4, [r0, r1]!
+ ldr_pre r4, r0, r1
usada8 r3, r7, lr, r3
bgt 1b
2:
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 0dbf5ca48a..2147658af6 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -531,6 +531,7 @@ function ff_vorbis_inverse_coupling_neon, export=1
2: vst1.32 {d2-d3}, [r3, :128]!
vst1.32 {d0-d1}, [r12,:128]!
+ it lt
bxlt lr
3: vld1.32 {d2-d3}, [r1,:128]
@@ -575,6 +576,7 @@ NOVFP vdup.32 q8, r2
2: vst1.32 {q2},[r0,:128]!
vst1.32 {q3},[r0,:128]!
ands len, len, #15
+ it eq
bxeq lr
3: vld1.32 {q0},[r1,:128]!
vmul.f32 q0, q0, q8
@@ -638,6 +640,7 @@ NOVFP ldr r3, [sp]
2: vst1.32 {q8},[r0,:128]!
vst1.32 {q9},[r0,:128]!
ands r3, r3, #7
+ it eq
popeq {pc}
3: vld1.32 {q0},[r1,:128]!
ldr r12, [r2], #4
diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S
index 497c02be92..108208174d 100644
--- a/libavcodec/arm/dsputil_vfp.S
+++ b/libavcodec/arm/dsputil_vfp.S
@@ -55,18 +55,23 @@ function ff_vector_fmul_vfp, export=1
1:
subs r3, r3, #16
vmul.f32 s12, s4, s12
+ itttt ge
vldmiage r1!, {s16-s19}
vldmiage r2!, {s24-s27}
vldmiage r1!, {s20-s23}
vldmiage r2!, {s28-s31}
+ it ge
vmulge.f32 s24, s16, s24
vstmia r0!, {s8-s11}
vstmia r0!, {s12-s15}
+ it ge
vmulge.f32 s28, s20, s28
+ itttt gt
vldmiagt r1!, {s0-s3}
vldmiagt r2!, {s8-s11}
vldmiagt r1!, {s4-s7}
vldmiagt r2!, {s12-s15}
+ ittt ge
vmulge.f32 s8, s0, s8
vstmiage r0!, {s24-s27}
vstmiage r0!, {s28-s31}
@@ -97,33 +102,49 @@ function ff_vector_fmul_reverse_vfp, export=1
vmul.f32 s11, s0, s11
1:
subs r3, r3, #16
+ it ge
vldmdbge r2!, {s16-s19}
vmul.f32 s12, s7, s12
+ it ge
vldmiage r1!, {s24-s27}
vmul.f32 s13, s6, s13
+ it ge
vldmdbge r2!, {s20-s23}
vmul.f32 s14, s5, s14
+ it ge
vldmiage r1!, {s28-s31}
vmul.f32 s15, s4, s15
+ it ge
vmulge.f32 s24, s19, s24
+ it gt
vldmdbgt r2!, {s0-s3}
+ it ge
vmulge.f32 s25, s18, s25
vstmia r0!, {s8-s13}
+ it ge
vmulge.f32 s26, s17, s26
+ it gt
vldmiagt r1!, {s8-s11}
+ itt ge
vmulge.f32 s27, s16, s27
vmulge.f32 s28, s23, s28
+ it gt
vldmdbgt r2!, {s4-s7}
+ it ge
vmulge.f32 s29, s22, s29
vstmia r0!, {s14-s15}
+ ittt ge
vmulge.f32 s30, s21, s30
vmulge.f32 s31, s20, s31
vmulge.f32 s8, s3, s8
+ it gt
vldmiagt r1!, {s12-s15}
+ itttt ge
vmulge.f32 s9, s2, s9
vmulge.f32 s10, s1, s10
vstmiage r0!, {s24-s27}
vmulge.f32 s11, s0, s11
+ it ge
vstmiage r0!, {s28-s31}
bgt 1b
diff --git a/libavcodec/arm/fmtconvert_neon.S b/libavcodec/arm/fmtconvert_neon.S
index 359e57e40b..d1ad32ed27 100644
--- a/libavcodec/arm/fmtconvert_neon.S
+++ b/libavcodec/arm/fmtconvert_neon.S
@@ -71,6 +71,7 @@ endfunc
function ff_float_to_int16_interleave_neon, export=1
cmp r3, #2
+ itt lt
ldrlt r1, [r1]
blt ff_float_to_int16_neon
bne 4f
@@ -196,6 +197,7 @@ function ff_float_to_int16_interleave_neon, export=1
vst1.64 {d3}, [r8], ip
vst1.64 {d7}, [r8], ip
subs r3, r3, #4
+ it eq
popeq {r4-r8,pc}
cmp r3, #4
add r0, r0, #8
@@ -305,6 +307,7 @@ function ff_float_to_int16_interleave_neon, export=1
vst1.32 {d23[1]}, [r8], ip
8: subs r3, r3, #2
add r0, r0, #4
+ it eq
popeq {r4-r8,pc}
@ 1 channel
@@ -354,6 +357,7 @@ function ff_float_to_int16_interleave_neon, export=1
vst1.16 {d2[3]}, [r5,:16], ip
vst1.16 {d3[1]}, [r5,:16], ip
vst1.16 {d3[3]}, [r5,:16], ip
+ it eq
popeq {r4-r8,pc}
vld1.64 {d0-d1}, [r4,:128]!
vcvt.s32.f32 q0, q0, #16
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index da2ef8c158..7e2eb83620 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -46,6 +46,7 @@ function ff_float_to_int16_vfp, export=1
vmov r5, r6, s2, s3
vmov r7, r8, s4, s5
vmov ip, lr, s6, s7
+ it gt
vldmiagt r1!, {s16-s23}
ssat r4, #16, r4
ssat r3, #16, r3
@@ -53,10 +54,12 @@ function ff_float_to_int16_vfp, export=1
ssat r5, #16, r5
pkhbt r3, r3, r4, lsl #16
pkhbt r4, r5, r6, lsl #16
+ itttt gt
vcvtgt.s32.f32 s0, s16
vcvtgt.s32.f32 s1, s17
vcvtgt.s32.f32 s2, s18
vcvtgt.s32.f32 s3, s19
+ itttt gt
vcvtgt.s32.f32 s4, s20
vcvtgt.s32.f32 s5, s21
vcvtgt.s32.f32 s6, s22
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index bd15ced736..338de6f643 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -71,7 +71,9 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
pld [r1]
pld [r1, r2]
- muls r7, r4, r5
+A muls r7, r4, r5
+T mul r7, r4, r5
+T cmp r7, #0
rsb r6, r7, r5, lsl #3
rsb ip, r7, r4, lsl #3
sub r4, r7, r4, lsl #3
@@ -197,7 +199,9 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
pld [r1]
pld [r1, r2]
- muls r7, r4, r5
+A muls r7, r4, r5
+T mul r7, r4, r5
+T cmp r7, #0
rsb r6, r7, r5, lsl #3
rsb ip, r7, r4, lsl #3
sub r4, r7, r4, lsl #3
@@ -368,10 +372,10 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
pop {r4-r6, pc}
2:
.ifc \type,put
- ldrh r5, [r1], r2
- strh r5, [r0], r2
- ldrh r6, [r1], r2
- strh r6, [r0], r2
+ ldrh_post r5, r1, r2
+ strh_post r5, r0, r2
+ ldrh_post r6, r1, r2
+ strh_post r6, r0, r2
.else
vld1.16 {d16[0]}, [r1], r2
vld1.16 {d16[1]}, [r1], r2
@@ -404,28 +408,17 @@ endfunc
ldr ip, [sp]
tst r2, r2
ldr ip, [ip]
+ it ne
tstne r3, r3
vmov.32 d24[0], ip
and ip, ip, ip, lsl #16
+ it eq
bxeq lr
ands ip, ip, ip, lsl #8
+ it lt
bxlt lr
.endm
- .macro align_push_regs
- and ip, sp, #15
- add ip, ip, #32
- sub sp, sp, ip
- vst1.64 {d12-d15}, [sp,:128]
- sub sp, sp, #32
- vst1.64 {d8-d11}, [sp,:128]
- .endm
-
- .macro align_pop_regs
- vld1.64 {d8-d11}, [sp,:128]!
- vld1.64 {d12-d15}, [sp,:128], ip
- .endm
-
.macro h264_loop_filter_luma
vdup.8 q11, r2 @ alpha
vmovl.u8 q12, d24
@@ -506,7 +499,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
vld1.64 {d18,d19}, [r0,:128], r1
vld1.64 {d16,d17}, [r0,:128], r1
- align_push_regs
+ vpush {d8-d15}
h264_loop_filter_luma
@@ -516,7 +509,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
vst1.64 {d0, d1}, [r0,:128], r1
vst1.64 {d10,d11}, [r0,:128]
- align_pop_regs
+ vpop {d8-d15}
bx lr
endfunc
@@ -543,7 +536,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13
- align_push_regs
+ vpush {d8-d15}
h264_loop_filter_luma
@@ -568,7 +561,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
vst1.32 {d1[1]}, [r0], r1
vst1.32 {d11[1]}, [r0], r1
- align_pop_regs
+ vpop {d8-d15}
bx lr
endfunc
@@ -1116,6 +1109,7 @@ function \type\()_h264_qpel8_hv_lowpass_neon
vrhadd.u8 d11, d11, d7
sub r0, r0, r2, lsl #3
.endif
+
vst1.64 {d12}, [r0,:64], r2
vst1.64 {d13}, [r0,:64], r2
vst1.64 {d14}, [r0,:64], r2
@@ -1263,7 +1257,9 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
\type\()_h264_qpel8_mc11:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #64
mov r0, sp
sub r1, r1, #2
@@ -1271,14 +1267,14 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
mov ip, #8
vpush {d8-d15}
bl put_h264_qpel8_h_lowpass_neon
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
mov r3, r2
add ip, sp, #64
sub r1, r1, r2, lsl #1
mov r2, #8
bl \type\()_h264_qpel8_v_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r11, pc}
endfunc
@@ -1287,7 +1283,9 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
\type\()_h264_qpel8_mc21:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(8*8+16*12)
sub r1, r1, #2
mov r3, #8
@@ -1296,14 +1294,14 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
vpush {d8-d15}
bl put_h264_qpel8_h_lowpass_neon
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
sub r2, r4, #64
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4, r10, r11, pc}
endfunc
@@ -1330,7 +1328,9 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
\type\()_h264_qpel8_mc12:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(8*8+16*12)
sub r1, r1, r2, lsl #1
mov r3, r2
@@ -1339,20 +1339,22 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
vpush {d8-d15}
bl put_h264_qpel8_v_lowpass_neon
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r3, lsl #1
sub r1, r1, #2
sub r2, r4, #64
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4, r10, r11, pc}
endfunc
function ff_\type\()_h264_qpel8_mc22_neon, export=1
push {r4, r10, r11, lr}
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r4, r11, #15
+T mov sp, r4
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
@@ -1441,21 +1443,23 @@ function ff_\type\()_h264_qpel16_mc11_neon, export=1
\type\()_h264_qpel16_mc11:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #256
mov r0, sp
sub r1, r1, #2
mov r3, #16
vpush {d8-d15}
bl put_h264_qpel16_h_lowpass_neon
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
mov r3, r2
add ip, sp, #64
sub r1, r1, r2, lsl #1
mov r2, #16
bl \type\()_h264_qpel16_v_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4, r11, pc}
endfunc
@@ -1464,20 +1468,22 @@ function ff_\type\()_h264_qpel16_mc21_neon, export=1
\type\()_h264_qpel16_mc21:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(16*16+16*12)
sub r1, r1, #2
mov r0, sp
vpush {d8-d15}
bl put_h264_qpel16_h_lowpass_neon_packed
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
bl \type\()_h264_qpel16_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4-r5, r9-r11, pc}
endfunc
@@ -1504,7 +1510,9 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
\type\()_h264_qpel16_mc12:
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r0, r11, #15
+T mov sp, r0
sub sp, sp, #(16*16+16*12)
sub r1, r1, r2, lsl #1
mov r0, sp
@@ -1512,13 +1520,13 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
vpush {d8-d15}
bl put_h264_qpel16_v_lowpass_neon_packed
mov r4, r0
- ldrd r0, [r11]
+ ldrd r0, [r11], #8
sub r1, r1, r3, lsl #1
sub r1, r1, #2
mov r2, r3
bl \type\()_h264_qpel16_hv_lowpass_l2_neon
vpop {d8-d15}
- add sp, r11, #8
+ mov sp, r11
pop {r4-r5, r9-r11, pc}
endfunc
@@ -1526,7 +1534,9 @@ function ff_\type\()_h264_qpel16_mc22_neon, export=1
push {r4, r9-r11, lr}
lowpass_const r3
mov r11, sp
- bic sp, sp, #15
+A bic sp, sp, #15
+T bic r4, r11, #15
+T mov sp, r4
sub r1, r1, r2, lsl #1
sub r1, r1, #2
mov r3, r2
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index afd3718518..6ea56587b8 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -106,10 +106,12 @@ function ff_h264_idct_add16_neon, export=1
blt 2f
ldrsh lr, [r1]
add r0, r0, r4
+ it ne
movne lr, #0
cmp lr, #0
- adrne lr, ff_h264_idct_dc_add_neon
- adreq lr, ff_h264_idct_add_neon
+ ite ne
+ adrne lr, ff_h264_idct_dc_add_neon + CONFIG_THUMB
+ adreq lr, ff_h264_idct_add_neon + CONFIG_THUMB
blx lr
2: subs ip, ip, #1
add r1, r1, #32
@@ -132,8 +134,9 @@ function ff_h264_idct_add16intra_neon, export=1
add r0, r0, r4
cmp r8, #0
ldrsh r8, [r1]
- adrne lr, ff_h264_idct_add_neon
- adreq lr, ff_h264_idct_dc_add_neon
+ iteet ne
+ adrne lr, ff_h264_idct_add_neon + CONFIG_THUMB
+ adreq lr, ff_h264_idct_dc_add_neon + CONFIG_THUMB
cmpeq r8, #0
blxne lr
subs ip, ip, #1
@@ -159,12 +162,14 @@ function ff_h264_idct_add8_neon, export=1
add r1, r3, r12, lsl #5
cmp r8, #0
ldrsh r8, [r1]
- adrne lr, ff_h264_idct_add_neon
- adreq lr, ff_h264_idct_dc_add_neon
+ iteet ne
+ adrne lr, ff_h264_idct_add_neon + CONFIG_THUMB
+ adreq lr, ff_h264_idct_dc_add_neon + CONFIG_THUMB
cmpeq r8, #0
blxne lr
add r12, r12, #1
cmp r12, #4
+ itt eq
moveq r12, #16
moveq r4, r9
cmp r12, #20
@@ -365,10 +370,12 @@ function ff_h264_idct8_add4_neon, export=1
blt 2f
ldrsh lr, [r1]
add r0, r0, r4
+ it ne
movne lr, #0
cmp lr, #0
- adrne lr, ff_h264_idct8_dc_add_neon
- adreq lr, ff_h264_idct8_add_neon
+ ite ne
+ adrne lr, ff_h264_idct8_dc_add_neon + CONFIG_THUMB
+ adreq lr, ff_h264_idct8_add_neon + CONFIG_THUMB
blx lr
2: subs r12, r12, #4
add r1, r1, #128
diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 299a973cb6..d67714c496 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -64,11 +64,14 @@ static inline av_const int mid_pred(int a, int b, int c)
__asm__ (
"mov %0, %2 \n\t"
"cmp %1, %2 \n\t"
+ "itt gt \n\t"
"movgt %0, %1 \n\t"
"movgt %1, %2 \n\t"
"cmp %1, %3 \n\t"
+ "it le \n\t"
"movle %1, %3 \n\t"
"cmp %0, %1 \n\t"
+ "it gt \n\t"
"movgt %0, %1 \n\t"
: "=&r"(m), "+r"(a)
: "r"(b), "r"(c)
diff --git a/libavcodec/arm/mdct_neon.S b/libavcodec/arm/mdct_neon.S
index fcf802275f..2def704497 100644
--- a/libavcodec/arm/mdct_neon.S
+++ b/libavcodec/arm/mdct_neon.S
@@ -191,7 +191,9 @@ function ff_mdct_calc_neon, export=1
vadd.f32 d17, d17, d3 @ in2u+in1d -I
1:
vmul.f32 d7, d0, d21 @ I*s
- ldr r10, [r3, lr, lsr #1]
+A ldr r10, [r3, lr, lsr #1]
+T lsr r10, lr, #1
+T ldr r10, [r3, r10]
vmul.f32 d6, d1, d20 @ -R*c
ldr r6, [r3, #4]!
vmul.f32 d4, d1, d21 @ -R*s
diff --git a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
index 9ec731480b..b517b973e7 100644
--- a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
+++ b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -75,7 +75,7 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1
sum8 r8, r9, r1, r0, r10, r11, r12, lr
sum8 r8, r9, r1, r2, r10, r11, r12, lr, rsb, 32
round r10, r8, r9
- strh r10, [r3], r4
+ strh_post r10, r3, r4
mov lr, #15
1:
@@ -127,10 +127,10 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1
round r10, r8, r9
adds r8, r8, r4
adc r9, r9, r7
- strh r10, [r3], r12
+ strh_post r10, r3, r12
round r11, r8, r9
subs lr, lr, #1
- strh r11, [r5], -r12
+ strh_dpost r11, r5, r12
bgt 1b
sum8 r8, r9, r1, r0, r10, r11, r12, lr, rsb, 33
diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
index 82095ab15d..3db9c734e9 100644
--- a/libavcodec/arm/mpegvideo_armv5te_s.S
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -38,15 +38,21 @@
.macro dequant_t dst, src, mul, add, tmp
rsbs \tmp, ip, \src, asr #16
+ it gt
addgt \tmp, \add, #0
+ it lt
rsblt \tmp, \add, #0
+ it ne
smlatbne \dst, \src, \mul, \tmp
.endm
.macro dequant_b dst, src, mul, add, tmp
rsbs \tmp, ip, \src, lsl #16
+ it gt
addgt \tmp, \add, #0
+ it lt
rsblt \tmp, \add, #0
+ it ne
smlabbne \dst, \src, \mul, \tmp
.endm
@@ -80,21 +86,27 @@ function ff_dct_unquantize_h263_armv5te, export=1
strh lr, [r0], #2
subs r3, r3, #8
+ it gt
ldrdgt r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
bgt 1b
adds r3, r3, #2
+ it le
pople {r4-r9,pc}
2:
ldrsh r9, [r0, #0]
ldrsh lr, [r0, #2]
mov r8, r2
cmp r9, #0
+ it lt
rsblt r8, r2, #0
+ it ne
smlabbne r9, r9, r1, r8
mov r8, r2
cmp lr, #0
+ it lt
rsblt r8, r2, #0
+ it ne
smlabbne lr, lr, r1, r8
strh r9, [r0], #2
strh lr, [r0], #2
diff --git a/libavcodec/arm/mpegvideo_neon.S b/libavcodec/arm/mpegvideo_neon.S
index b695fb7c22..849047e13c 100644
--- a/libavcodec/arm/mpegvideo_neon.S
+++ b/libavcodec/arm/mpegvideo_neon.S
@@ -57,6 +57,7 @@ function ff_dct_unquantize_h263_neon, export=1
subs r3, r3, #16
vst1.16 {q0}, [r1,:128]!
vst1.16 {q8}, [r1,:128]!
+ it le
bxle lr
cmp r3, #8
bgt 1b
@@ -78,6 +79,7 @@ function ff_dct_unquantize_h263_intra_neon, export=1
ldr r6, [r0, #AC_PRED]
add lr, r0, #INTER_SCANTAB_RASTER_END
cmp r6, #0
+ it ne
movne r12, #63
bne 1f
ldr r12, [r12, r2, lsl #2]
@@ -86,9 +88,11 @@ function ff_dct_unquantize_h263_intra_neon, export=1
ldrsh r4, [r1]
cmp r5, #0
mov r5, r1
+ it ne
movne r2, #0
bne 2f
cmp r2, #4
+ it ge
addge r0, r0, #4
sub r2, r3, #1
ldr r6, [r0, #Y_DC_SCALE]
diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
index 4f8a1032cc..19886e6d0b 100644
--- a/libavcodec/arm/rdft_neon.S
+++ b/libavcodec/arm/rdft_neon.S
@@ -137,6 +137,7 @@ function ff_rdft_calc_neon, export=1
vst1.32 {d22}, [r5,:64]
cmp r6, #0
+ it eq
popeq {r4-r8,pc}
vmul.f32 d22, d22, d18
diff --git a/libavcodec/arm/simple_idct_arm.S b/libavcodec/arm/simple_idct_arm.S
index ecb83d23ad..990dde6ff7 100644
--- a/libavcodec/arm/simple_idct_arm.S
+++ b/libavcodec/arm/simple_idct_arm.S
@@ -121,11 +121,13 @@ __b_evaluation:
ldr r11, [r12, #offW7] @ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
- teq r2, #0 @ if null avoid muls
- mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+ teq r2, #0 @ if null avoid muls
+ itttt ne
+ mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+ it ne
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ at this point, R0=b0, R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
@@ -148,19 +150,23 @@ __b_evaluation:
@@ MAC16(b3, -W1, row[7]);
@@ MAC16(b1, -W5, row[7]);
mov r3, r3, asr #16 @ R3=ROWr16[5]
- teq r3, #0 @ if null avoid muls
+ teq r3, #0 @ if null avoid muls
+ it ne
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0
mov r4, r4, asr #16 @ R4=ROWr16[7]
+ itttt ne
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5]=b3
rsbne r3, r3, #0 @ R3=-ROWr16[5]
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1
@@ R3 is free now
- teq r4, #0 @ if null avoid muls
+ teq r4, #0 @ if null avoid muls
+ itttt ne
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7]
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7]=b3
+ it ne
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7]=b1
@@ R4 is free now
__end_b_evaluation:
@@ -204,16 +210,19 @@ __a_evaluation:
@@ a2 -= W4*row[4]
@@ a3 += W4*row[4]
ldrsh r11, [r14, #8] @ R11=ROWr16[4]
- teq r11, #0 @ if null avoid muls
+ teq r11, #0 @ if null avoid muls
+ it ne
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
ldrsh r9, [r14, #12] @ R9=ROWr16[6]
+ itttt ne
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
- teq r9, #0 @ if null avoid muls
+ teq r9, #0 @ if null avoid muls
+ itttt ne
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
@@ -222,6 +231,7 @@ __a_evaluation:
@@ a1 -= W2*row[6];
@@ a2 += W2*row[6];
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
+ itt ne
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
@@ -323,10 +333,12 @@ __b_evaluation2:
ldrsh r2, [r14, #48]
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if 0, then avoid muls
+ itttt ne
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+ it ne
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
@@ -342,18 +354,22 @@ __b_evaluation2:
@@ MAC16(b1, -W5, col[7x8]);
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
teq r3, #0 @ if 0 then avoid muls
+ itttt ne
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
+ it ne
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
@@ R3 is free now
teq r4, #0 @ if 0 then avoid muls
+ itttt ne
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
+ it ne
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
@@ R4 is free now
__end_b_evaluation2:
@@ -390,15 +406,18 @@ __a_evaluation2:
@@ a3 += W4*row[4]
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
teq r11, #0 @ if null avoid muls
+ itttt ne
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
+ it ne
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
teq r9, #0 @ if null avoid muls
+ itttt ne
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
@@ -407,6 +426,7 @@ __a_evaluation2:
@@ a1 -= W2*row[6];
@@ a2 += W2*row[6];
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
+ itt ne
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
__end_a_evaluation2:
diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S
index 3c4b5c06d1..71727ceccc 100644
--- a/libavcodec/arm/simple_idct_armv5te.S
+++ b/libavcodec/arm/simple_idct_armv5te.S
@@ -49,6 +49,7 @@ function idct_row_armv5te
ldrd v1, [a1, #8]
ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
orrs v1, v1, v2
+ itt eq
cmpeq v1, a4
cmpeq v1, a3, lsr #16
beq row_dc_only
@@ -269,6 +270,7 @@ function idct_col_armv5te
ldmfd sp!, {a3, a4}
adds a2, a3, v1
mov a2, a2, lsr #20
+ it mi
orrmi a2, a2, #0xf000
add ip, a4, v2
mov ip, ip, asr #20
@@ -276,6 +278,7 @@ function idct_col_armv5te
str a2, [a1]
subs a3, a3, v1
mov a2, a3, lsr #20
+ it mi
orrmi a2, a2, #0xf000
sub a4, a4, v2
mov a4, a4, asr #20
@@ -285,6 +288,7 @@ function idct_col_armv5te
subs a2, a3, v3
mov a2, a2, lsr #20
+ it mi
orrmi a2, a2, #0xf000
sub ip, a4, v4
mov ip, ip, asr #20
@@ -292,6 +296,7 @@ function idct_col_armv5te
str a2, [a1, #(16*1)]
adds a3, a3, v3
mov a2, a3, lsr #20
+ it mi
orrmi a2, a2, #0xf000
add a4, a4, v4
mov a4, a4, asr #20
@@ -301,6 +306,7 @@ function idct_col_armv5te
adds a2, a3, v5
mov a2, a2, lsr #20
+ it mi
orrmi a2, a2, #0xf000
add ip, a4, v6
mov ip, ip, asr #20
@@ -308,6 +314,7 @@ function idct_col_armv5te
str a2, [a1, #(16*2)]
subs a3, a3, v5
mov a2, a3, lsr #20
+ it mi
orrmi a2, a2, #0xf000
sub a4, a4, v6
mov a4, a4, asr #20
@@ -317,6 +324,7 @@ function idct_col_armv5te
adds a2, a3, v7
mov a2, a2, lsr #20
+ it mi
orrmi a2, a2, #0xf000
add ip, a4, fp
mov ip, ip, asr #20
@@ -324,6 +332,7 @@ function idct_col_armv5te
str a2, [a1, #(16*3)]
subs a3, a3, v7
mov a2, a3, lsr #20
+ it mi
orrmi a2, a2, #0xf000
sub a4, a4, fp
mov a4, a4, asr #20
@@ -335,15 +344,19 @@ endfunc
.macro clip dst, src:vararg
movs \dst, \src
+ it mi
movmi \dst, #0
cmp \dst, #255
+ it gt
movgt \dst, #255
.endm
.macro aclip dst, src:vararg
adds \dst, \src
+ it mi
movmi \dst, #0
cmp \dst, #255
+ it gt
movgt \dst, #255
.endm
@@ -370,35 +383,35 @@ function idct_col_put_armv5te
orr a2, a3, a4, lsl #8
rsb v2, lr, lr, lsl #3
ldmfd sp!, {a3, a4}
- strh a2, [v2, v1]!
+ strh_pre a2, v2, v1
sub a2, a3, v3
clip a2, a2, asr #20
sub ip, a4, v4
clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
- strh a2, [v1, lr]!
+ strh_pre a2, v1, lr
add a3, a3, v3
clip a2, a3, asr #20
add a4, a4, v4
clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
- strh a2, [v2, -lr]!
+ strh_dpre a2, v2, lr
add a2, a3, v5
clip a2, a2, asr #20
add ip, a4, v6
clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
- strh a2, [v1, lr]!
+ strh_pre a2, v1, lr
sub a3, a3, v5
clip a2, a3, asr #20
sub a4, a4, v6
clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
- strh a2, [v2, -lr]!
+ strh_dpre a2, v2, lr
add a2, a3, v7
clip a2, a2, asr #20
@@ -411,7 +424,7 @@ function idct_col_put_armv5te
sub a4, a4, fp
clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
- strh a2, [v2, -lr]
+ strh_dpre a2, v2, lr
ldr pc, [sp], #4
endfunc
@@ -436,7 +449,7 @@ function idct_col_add_armv5te
ldr v1, [sp, #32]
sub a4, a4, v2
rsb v2, v1, v1, lsl #3
- ldrh ip, [v2, lr]!
+ ldrh_pre ip, v2, lr
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
@@ -448,7 +461,7 @@ function idct_col_add_armv5te
strh a2, [v2]
ldmfd sp!, {a3, a4}
- ldrh ip, [lr, v1]!
+ ldrh_pre ip, lr, v1
sub a2, a3, v3
add a3, a3, v3
and v3, ip, #255
@@ -458,7 +471,7 @@ function idct_col_add_armv5te
aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
add a4, a4, v4
- ldrh ip, [v2, -v1]!
+ ldrh_dpre ip, v2, v1
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
@@ -468,7 +481,7 @@ function idct_col_add_armv5te
strh a2, [v2]
ldmfd sp!, {a3, a4}
- ldrh ip, [lr, v1]!
+ ldrh_pre ip, lr, v1
add a2, a3, v5
sub a3, a3, v5
and v3, ip, #255
@@ -478,7 +491,7 @@ function idct_col_add_armv5te
aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
sub a4, a4, v6
- ldrh ip, [v2, -v1]!
+ ldrh_dpre ip, v2, v1
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
@@ -488,7 +501,7 @@ function idct_col_add_armv5te
strh a2, [v2]
ldmfd sp!, {a3, a4}
- ldrh ip, [lr, v1]!
+ ldrh_pre ip, lr, v1
add a2, a3, v7
sub a3, a3, v7
and v3, ip, #255
@@ -498,7 +511,7 @@ function idct_col_add_armv5te
aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
sub a4, a4, fp
- ldrh ip, [v2, -v1]!
+ ldrh_dpre ip, v2, v1
strh a2, [lr]
and a2, ip, #255
aclip a3, a2, a3, asr #20
diff --git a/libavcodec/arm/simple_idct_armv6.S b/libavcodec/arm/simple_idct_armv6.S
index d61c1fd3ea..a176b3a7b4 100644
--- a/libavcodec/arm/simple_idct_armv6.S
+++ b/libavcodec/arm/simple_idct_armv6.S
@@ -200,6 +200,7 @@ function idct_row_armv6
ldr r3, [r0, #8] /* r3 = row[3,1] */
ldr r2, [r0] /* r2 = row[2,0] */
orrs lr, lr, ip
+ itt eq
cmpeq lr, r3
cmpeq lr, r2, lsr #16
beq 1f
@@ -282,14 +283,14 @@ function idct_col_put_armv6
pop {r1, r2}
idct_finish_shift_sat COL_SHIFT
- strb r4, [r1], r2
- strb r5, [r1], r2
- strb r6, [r1], r2
- strb r7, [r1], r2
- strb r11,[r1], r2
- strb r10,[r1], r2
- strb r9, [r1], r2
- strb r8, [r1], r2
+ strb_post r4, r1, r2
+ strb_post r5, r1, r2
+ strb_post r6, r1, r2
+ strb_post r7, r1, r2
+ strb_post r11,r1, r2
+ strb_post r10,r1, r2
+ strb_post r9, r1, r2
+ strb_post r8, r1, r2
sub r1, r1, r2, lsl #3
@@ -318,16 +319,16 @@ function idct_col_add_armv6
add ip, r3, ip, asr #COL_SHIFT
usat ip, #8, ip
add r4, r7, r4, asr #COL_SHIFT
- strb ip, [r1], r2
+ strb_post ip, r1, r2
ldrb ip, [r1, r2]
usat r4, #8, r4
ldrb r11,[r1, r2, lsl #2]
add r5, ip, r5, asr #COL_SHIFT
usat r5, #8, r5
- strb r4, [r1], r2
+ strb_post r4, r1, r2
ldrb r3, [r1, r2]
ldrb ip, [r1, r2, lsl #2]
- strb r5, [r1], r2
+ strb_post r5, r1, r2
ldrb r7, [r1, r2]
ldrb r4, [r1, r2, lsl #2]
add r6, r3, r6, asr #COL_SHIFT
@@ -340,11 +341,11 @@ function idct_col_add_armv6
usat r8, #8, r8
add lr, r4, lr, asr #COL_SHIFT
usat lr, #8, lr
- strb r6, [r1], r2
- strb r10,[r1], r2
- strb r9, [r1], r2
- strb r8, [r1], r2
- strb lr, [r1], r2
+ strb_post r6, r1, r2
+ strb_post r10,r1, r2
+ strb_post r9, r1, r2
+ strb_post r8, r1, r2
+ strb_post lr, r1, r2
sub r1, r1, r2, lsl #3
diff --git a/libavcodec/arm/simple_idct_neon.S b/libavcodec/arm/simple_idct_neon.S
index 17cde5835a..64a7fbf13a 100644
--- a/libavcodec/arm/simple_idct_neon.S
+++ b/libavcodec/arm/simple_idct_neon.S
@@ -71,7 +71,7 @@ function idct_row4_pld_neon
add r3, r0, r1, lsl #2
pld [r0, r1]
pld [r0, r1, lsl #1]
- pld [r3, -r1]
+A pld [r3, -r1]
pld [r3]
pld [r3, r1]
add r3, r3, r1, lsl #1
@@ -164,6 +164,7 @@ function idct_col4_neon
orrs r4, r4, r5
idct_col4_top
+ it eq
addeq r2, r2, #16
beq 1f
@@ -176,6 +177,7 @@ function idct_col4_neon
1: orrs r6, r6, r7
ldrd r4, [r2, #16]
+ it eq
addeq r2, r2, #16
beq 2f
@@ -187,6 +189,7 @@ function idct_col4_neon
2: orrs r4, r4, r5
ldrd r4, [r2, #16]
+ it eq
addeq r2, r2, #16
beq 3f
@@ -199,6 +202,7 @@ function idct_col4_neon
vadd.i32 q13, q13, q8
3: orrs r4, r4, r5
+ it eq
addeq r2, r2, #16
beq 4f
diff --git a/libavcodec/arm/synth_filter_neon.S b/libavcodec/arm/synth_filter_neon.S
index 1464abe562..3f91d67506 100644
--- a/libavcodec/arm/synth_filter_neon.S
+++ b/libavcodec/arm/synth_filter_neon.S
@@ -100,9 +100,11 @@ NOVFP vldr s0, [sp, #12*4] @ scale
vst1.32 {q9}, [r2,:128]
subs r1, r1, #1
+ it eq
popeq {r4-r11,pc}
cmp r4, #0
+ itt eq
subeq r8, r8, #512*4
subeq r9, r9, #512*4
sub r5, r5, #512*4
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index cd02579e5b..ece9ac2a6c 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -21,6 +21,14 @@
#ifndef AVCODEC_ARM_VP56_ARITH_H
#define AVCODEC_ARM_VP56_ARITH_H
+#if CONFIG_THUMB
+# define A(x)
+# define T(x) x
+#else
+# define A(x) x
+# define T(x)
+#endif
+
#if HAVE_ARMV6 && HAVE_INLINE_ASM
#define vp56_rac_get_prob vp56_rac_get_prob_armv6
@@ -32,15 +40,21 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
unsigned bit;
__asm__ ("adds %3, %3, %0 \n"
+ "itt cs \n"
"cmpcs %7, %4 \n"
- "ldrcsh %2, [%4], #2 \n"
+ A("ldrcsh %2, [%4], #2 \n")
+ T("ldrhcs %2, [%4], #2 \n")
"rsb %0, %6, #256 \n"
"smlabb %0, %5, %6, %0 \n"
+ T("itttt cs \n")
"rev16cs %2, %2 \n"
- "orrcs %1, %1, %2, lsl %3 \n"
+ T("lslcs %2, %2, %3 \n")
+ T("orrcs %1, %1, %2 \n")
+ A("orrcs %1, %1, %2, lsl %3 \n")
"subcs %3, %3, #16 \n"
"lsr %0, %0, #8 \n"
"cmp %1, %0, lsl #16 \n"
+ "ittte ge \n"
"subge %1, %1, %0, lsl #16 \n"
"subge %0, %5, %0 \n"
"movge %2, #1 \n"
@@ -64,12 +78,17 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
unsigned tmp;
__asm__ ("adds %3, %3, %0 \n"
+ "itt cs \n"
"cmpcs %7, %4 \n"
- "ldrcsh %2, [%4], #2 \n"
+ A("ldrcsh %2, [%4], #2 \n")
+ T("ldrhcs %2, [%4], #2 \n")
"rsb %0, %6, #256 \n"
"smlabb %0, %5, %6, %0 \n"
+ T("itttt cs \n")
"rev16cs %2, %2 \n"
- "orrcs %1, %1, %2, lsl %3 \n"
+ T("lslcs %2, %2, %3 \n")
+ T("orrcs %1, %1, %2 \n")
+ A("orrcs %1, %1, %2, lsl %3 \n")
"subcs %3, %3, #16 \n"
"lsr %0, %0, #8 \n"
"lsl %2, %0, #16 \n"
diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S
index 93f4dd664b..b995360e0c 100644
--- a/libavcodec/arm/vp8_armv6.S
+++ b/libavcodec/arm/vp8_armv6.S
@@ -25,13 +25,18 @@
lsl \cw, \cw, \t0
lsl \t0, \h, \t0
rsb \h, \pr, #256
+ it cs
ldrhcs \t1, [\buf], #2
smlabb \h, \t0, \pr, \h
+T itttt cs
rev16cs \t1, \t1
- orrcs \cw, \cw, \t1, lsl \bs
+A orrcs \cw, \cw, \t1, lsl \bs
+T lslcs \t1, \t1, \bs
+T orrcs \cw, \cw, \t1
subcs \bs, \bs, #16
lsr \h, \h, #8
cmp \cw, \h, lsl #16
+ itt ge
subge \cw, \cw, \h, lsl #16
subge \h, \t0, \h
.endm
@@ -40,14 +45,20 @@
adds \bs, \bs, \t0
lsl \cw, \cw, \t0
lsl \t0, \h, \t0
+ it cs
ldrhcs \t1, [\buf], #2
mov \h, #128
+ it cs
rev16cs \t1, \t1
add \h, \h, \t0, lsl #7
- orrcs \cw, \cw, \t1, lsl \bs
+A orrcs \cw, \cw, \t1, lsl \bs
+T ittt cs
+T lslcs \t1, \t1, \bs
+T orrcs \cw, \cw, \t1
subcs \bs, \bs, #16
lsr \h, \h, #8
cmp \cw, \h, lsl #16
+ itt ge
subge \cw, \cw, \h, lsl #16
subge \h, \t0, \h
.endm
@@ -59,6 +70,7 @@ function ff_decode_block_coeffs_armv6, export=1
cmp r3, #0
ldr r11, [r5]
ldm r0, {r5-r7} @ high, bits, buf
+ it ne
pkhtbne r11, r11, r11, asr #16
ldr r8, [r0, #16] @ code_word
0:
@@ -80,19 +92,26 @@ function ff_decode_block_coeffs_armv6, export=1
adds r6, r6, r9
add r4, r4, #11
lsl r8, r8, r9
+ it cs
ldrhcs r10, [r7], #2
lsl r9, r5, r9
mov r5, #128
+ it cs
rev16cs r10, r10
add r5, r5, r9, lsl #7
- orrcs r8, r8, r10, lsl r6
+T ittt cs
+T lslcs r10, r10, r6
+T orrcs r8, r8, r10
+A orrcs r8, r8, r10, lsl r6
subcs r6, r6, #16
lsr r5, r5, #8
cmp r8, r5, lsl #16
movrel r10, zigzag_scan-1
+ itt ge
subge r8, r8, r5, lsl #16
subge r5, r9, r5
ldrb r10, [r10, r3]
+ it ge
rsbge r12, r12, #0
cmp r3, #16
strh r12, [r1, r10]
@@ -108,6 +127,7 @@ function ff_decode_block_coeffs_armv6, export=1
ldr r0, [sp]
ldr r9, [r0, #12]
cmp r7, r9
+ it hi
movhi r7, r9
stm r0, {r5-r7} @ high, bits, buf
str r8, [r0, #16] @ code_word
@@ -131,11 +151,13 @@ function ff_decode_block_coeffs_armv6, export=1
mov r12, #2
ldrb r0, [r4, #4]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
+ it ge
addge r12, #1
ldrb r9, [lr, r5]
blt 4f
ldrb r0, [r4, #5]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
+ it ge
addge r12, #1
ldrb r9, [lr, r5]
b 4f
@@ -153,6 +175,7 @@ function ff_decode_block_coeffs_armv6, export=1
mov r12, #5
mov r0, #159
rac_get_prob r5, r6, r7, r8, r0, r9, r10
+ it ge
addge r12, r12, #1
ldrb r9, [lr, r5]
b 4f
@@ -160,23 +183,28 @@ function ff_decode_block_coeffs_armv6, export=1
mov r12, #7
mov r0, #165
rac_get_prob r5, r6, r7, r8, r0, r9, r10
+ it ge
addge r12, r12, #2
ldrb r9, [lr, r5]
mov r0, #145
rac_get_prob r5, r6, r7, r8, r0, r9, r10
+ it ge
addge r12, r12, #1
ldrb r9, [lr, r5]
b 4f
3:
ldrb r0, [r4, #8]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
+ it ge
addge r4, r4, #1
ldrb r9, [lr, r5]
+ ite ge
movge r12, #2
movlt r12, #0
ldrb r0, [r4, #9]
rac_get_prob r5, r6, r7, r8, r0, r9, r10
mov r9, #8
+ it ge
addge r12, r12, #1
movrel r4, X(ff_vp8_dct_cat_prob)
lsl r9, r9, r12
@@ -189,6 +217,7 @@ function ff_decode_block_coeffs_armv6, export=1
lsl r1, r1, #1
rac_get_prob r5, r6, r7, r8, r0, r9, r10
ldrb r0, [r4], #1
+ it ge
addge r1, r1, #1
cmp r0, #0
bne 1b
@@ -200,6 +229,7 @@ function ff_decode_block_coeffs_armv6, export=1
add r4, r2, r4
add r4, r4, #22
rac_get_128 r5, r6, r7, r8, r9, r10
+ it ge
rsbge r12, r12, #0
smulbb r12, r12, r11
movrel r9, zigzag_scan-1
diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S
index 23330900f7..28487e7a60 100644
--- a/libavcodec/arm/vp8dsp_neon.S
+++ b/libavcodec/arm/vp8dsp_neon.S
@@ -746,14 +746,14 @@ function ff_put_vp8_pixels4_neon, export=1
push {r4-r6,lr}
1:
subs r12, r12, #4
- ldr r4, [r2], r3
- ldr r5, [r2], r3
- ldr r6, [r2], r3
- ldr lr, [r2], r3
- str r4, [r0], r1
- str r5, [r0], r1
- str r6, [r0], r1
- str lr, [r0], r1
+ ldr_post r4, r2, r3
+ ldr_post r5, r2, r3
+ ldr_post r6, r2, r3
+ ldr_post lr, r2, r3
+ str_post r4, r0, r1
+ str_post r5, r0, r1
+ str_post r6, r0, r1
+ str_post lr, r0, r1
bgt 1b
pop {r4-r6,pc}
endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 90c389b8c5..42eabdd623 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3320,7 +3320,7 @@ void av_resample_close(struct AVResampleContext *c);
/**
* Allocate memory for a picture. Call avpicture_free() to free it.
*
- * \see avpicture_fill()
+ * @see avpicture_fill()
*
* @param picture the picture to be filled in
* @param pix_fmt the format of the picture
@@ -3367,7 +3367,7 @@ int avpicture_fill(AVPicture *picture, uint8_t *ptr,
* The data is stored compactly, without any gaps for alignment or padding
* which may be applied by avpicture_fill().
*
- * \see avpicture_get_size()
+ * @see avpicture_get_size()
*
* @param[in] src AVPicture containing image data
* @param[in] pix_fmt The format in which the picture data is stored.
@@ -3964,7 +3964,7 @@ typedef struct AVCodecParserContext {
int64_t offset; ///< byte offset from starting packet start
int64_t cur_frame_end[AV_PARSER_PTS_NB];
- /*!
+ /**
* Set by parser to 1 for key frames and 0 for non-key frames.
* It is initialized to -1, so if the parser doesn't set this flag,
* old-style fallback using AV_PICTURE_TYPE_I picture type as key frames
@@ -4211,7 +4211,7 @@ void av_log_missing_feature(void *avc, const char *feature, int want_sample);
* a pointer to an AVClass struct
* @param[in] msg string containing an optional message, or NULL if no message
*/
-void av_log_ask_for_sample(void *avc, const char *msg, ...);
+void av_log_ask_for_sample(void *avc, const char *msg, ...) av_printf_format(2, 3);
/**
* Register the hardware accelerator hwaccel.
diff --git a/libavcodec/celp_filters.h b/libavcodec/celp_filters.h
index 145e3d3346..2fb2b03aaa 100644
--- a/libavcodec/celp_filters.h
+++ b/libavcodec/celp_filters.h
@@ -34,7 +34,7 @@
*
* fc_out[n] = sum(i,0,len-1){ fc_in[i] * filter[(len + n - i)%len] }
*
- * \note fc_in and fc_out should not overlap!
+ * @note fc_in and fc_out should not overlap!
*/
void ff_celp_convolve_circ(int16_t *fc_out, const int16_t *fc_in,
const int16_t *filter, int len);
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 24db7e3d24..0e19e947b1 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -119,7 +119,7 @@ extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[17];
/**
* Initialize the cosine table in ff_cos_tabs[index]
- * \param index index in ff_cos_tabs array of the table to initialize
+ * @param index index in ff_cos_tabs array of the table to initialize
*/
void ff_init_ff_cos_tabs(int index);
diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c
index 32db0597e3..c4a883f392 100644
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@@ -116,7 +116,7 @@ static const G729FormatDescription format_g729d_6k4 = {
};
/**
- * \brief pseudo random number generator
+ * @brief pseudo random number generator
*/
static inline uint16_t g729_prng(uint16_t value)
{
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 080b6a93b5..27fba4b628 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -97,7 +97,7 @@ found:
return i-(state&5);
}
-/*!
+/**
* Parse NAL units of found picture and decode some basic information.
*
* @param s parser context.
diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 02d3533b0c..5cff77f58c 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -32,25 +32,25 @@
#include "lagarithrac.h"
enum LagarithFrameType {
- FRAME_RAW = 1, /*!< uncompressed */
- FRAME_U_RGB24 = 2, /*!< unaligned RGB24 */
- FRAME_ARITH_YUY2 = 3, /*!< arithmetic coded YUY2 */
- FRAME_ARITH_RGB24 = 4, /*!< arithmetic coded RGB24 */
- FRAME_SOLID_GRAY = 5, /*!< solid grayscale color frame */
- FRAME_SOLID_COLOR = 6, /*!< solid non-grayscale color frame */
- FRAME_OLD_ARITH_RGB = 7, /*!< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
- FRAME_ARITH_RGBA = 8, /*!< arithmetic coded RGBA */
- FRAME_SOLID_RGBA = 9, /*!< solid RGBA color frame */
- FRAME_ARITH_YV12 = 10, /*!< arithmetic coded YV12 */
- FRAME_REDUCED_RES = 11, /*!< reduced resolution YV12 frame */
+ FRAME_RAW = 1, /**< uncompressed */
+ FRAME_U_RGB24 = 2, /**< unaligned RGB24 */
+ FRAME_ARITH_YUY2 = 3, /**< arithmetic coded YUY2 */
+ FRAME_ARITH_RGB24 = 4, /**< arithmetic coded RGB24 */
+ FRAME_SOLID_GRAY = 5, /**< solid grayscale color frame */
+ FRAME_SOLID_COLOR = 6, /**< solid non-grayscale color frame */
+ FRAME_OLD_ARITH_RGB = 7, /**< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
+ FRAME_ARITH_RGBA = 8, /**< arithmetic coded RGBA */
+ FRAME_SOLID_RGBA = 9, /**< solid RGBA color frame */
+ FRAME_ARITH_YV12 = 10, /**< arithmetic coded YV12 */
+ FRAME_REDUCED_RES = 11, /**< reduced resolution YV12 frame */
};
typedef struct LagarithContext {
AVCodecContext *avctx;
AVFrame picture;
DSPContext dsp;
- int zeros; /*!< number of consecutive zero bytes encountered */
- int zeros_rem; /*!< number of zero bytes remaining to output */
+ int zeros; /**< number of consecutive zero bytes encountered */
+ int zeros_rem; /**< number of zero bytes remaining to output */
} LagarithContext;
/**
diff --git a/libavcodec/lagarithrac.h b/libavcodec/lagarithrac.h
index 2cb7323076..8c78538f21 100644
--- a/libavcodec/lagarithrac.h
+++ b/libavcodec/lagarithrac.h
@@ -40,15 +40,15 @@ typedef struct lag_rac {
AVCodecContext *avctx;
unsigned low;
unsigned range;
- unsigned scale; /*!< Number of bits of precision in range. */
- unsigned hash_shift; /*!< Number of bits to shift to calculate hash for radix search. */
+ unsigned scale; /**< Number of bits of precision in range. */
+ unsigned hash_shift; /**< Number of bits to shift to calculate hash for radix search. */
- const uint8_t *bytestream_start; /*!< Start of input bytestream. */
- const uint8_t *bytestream; /*!< Current position in input bytestream. */
- const uint8_t *bytestream_end; /*!< End position of input bytestream. */
+ const uint8_t *bytestream_start; /**< Start of input bytestream. */
+ const uint8_t *bytestream; /**< Current position in input bytestream. */
+ const uint8_t *bytestream_end; /**< End position of input bytestream. */
- uint32_t prob[258]; /*!< Table of cumulative probability for each symbol. */
- uint8_t range_hash[256]; /*!< Hash table mapping upper byte to approximate symbol. */
+ uint32_t prob[258]; /**< Table of cumulative probability for each symbol. */
+ uint8_t range_hash[256]; /**< Hash table mapping upper byte to approximate symbol. */
} lag_rac;
void lag_rac_init(lag_rac *l, GetBitContext *gb, int length);
diff --git a/libavcodec/lcldec.c b/libavcodec/lcldec.c
index 57735ac6ff..7359864004 100644
--- a/libavcodec/lcldec.c
+++ b/libavcodec/lcldec.c
@@ -73,8 +73,8 @@ typedef struct LclDecContext {
/**
- * \param srcptr compressed source buffer, must be padded with at least 5 extra bytes
- * \param destptr must be padded sufficiently for av_memcpy_backptr
+ * @param srcptr compressed source buffer, must be padded with at least 5 extra bytes
+ * @param destptr must be padded sufficiently for av_memcpy_backptr
*/
static unsigned int mszh_decomp(const unsigned char * srcptr, int srclen, unsigned char * destptr, unsigned int destsize)
{
@@ -119,11 +119,11 @@ static unsigned int mszh_decomp(const unsigned char * srcptr, int srclen, unsign
#if CONFIG_ZLIB_DECODER
/**
- * \brief decompress a zlib-compressed data block into decomp_buf
- * \param src compressed input buffer
- * \param src_len data length in input buffer
- * \param offset offset in decomp_buf
- * \param expected expected decompressed length
+ * @brief decompress a zlib-compressed data block into decomp_buf
+ * @param src compressed input buffer
+ * @param src_len data length in input buffer
+ * @param offset offset in decomp_buf
+ * @param expected expected decompressed length
*/
static int zlib_decomp(AVCodecContext *avctx, const uint8_t *src, int src_len, int offset, int expected)
{
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index 98ca490a76..0ff0f0986a 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -74,9 +74,9 @@ void ff_acelp_lsf2lspd(double *lsp, const float *lsf, int lp_order)
}
/**
- * \brief decodes polynomial coefficients from LSP
- * \param f [out] decoded polynomial coefficients (-0x20000000 <= (3.22) <= 0x1fffffff)
- * \param lsp LSP coefficients (-0x8000 <= (0.15) <= 0x7fff)
+ * @brief decodes polynomial coefficients from LSP
+ * @param f [out] decoded polynomial coefficients (-0x20000000 <= (3.22) <= 0x1fffffff)
+ * @param lsp LSP coefficients (-0x8000 <= (0.15) <= 0x7fff)
*/
static void lsp2poly(int* f, const int16_t* lsp, int lp_half_order)
{
diff --git a/libavcodec/lsp.h b/libavcodec/lsp.h
index e3af30d300..1230669b1a 100644
--- a/libavcodec/lsp.h
+++ b/libavcodec/lsp.h
@@ -30,12 +30,12 @@
*/
/**
- * \brief ensure a minimum distance between LSFs
- * \param[in,out] lsfq LSF to check and adjust
- * \param lsfq_min_distance minimum distance between LSFs
- * \param lsfq_min minimum allowed LSF value
- * \param lsfq_max maximum allowed LSF value
- * \param lp_order LP filter order
+ * @brief ensure a minimum distance between LSFs
+ * @param[in,out] lsfq LSF to check and adjust
+ * @param lsfq_min_distance minimum distance between LSFs
+ * @param lsfq_min minimum allowed LSF value
+ * @param lsfq_max maximum allowed LSF value
+ * @param lp_order LP filter order
*/
void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order);
@@ -53,12 +53,12 @@ void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, in
void ff_set_min_dist_lsf(float *lsf, double min_spacing, int size);
/**
- * \brief Convert LSF to LSP
- * \param[out] lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
- * \param lsf normalized LSF coefficients (0 <= (2.13) < 0x2000 * PI)
- * \param lp_order LP filter order
+ * @brief Convert LSF to LSP
+ * @param[out] lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
+ * @param lsf normalized LSF coefficients (0 <= (2.13) < 0x2000 * PI)
+ * @param lp_order LP filter order
*
- * \remark It is safe to pass the same array into the lsf and lsp parameters.
+ * @remark It is safe to pass the same array into the lsf and lsp parameters.
*/
void ff_acelp_lsf2lsp(int16_t *lsp, const int16_t *lsf, int lp_order);
@@ -68,10 +68,10 @@ void ff_acelp_lsf2lsp(int16_t *lsp, const int16_t *lsf, int lp_order);
void ff_acelp_lsf2lspd(double *lsp, const float *lsf, int lp_order);
/**
- * \brief LSP to LP conversion (3.2.6 of G.729)
- * \param[out] lp decoded LP coefficients (-0x8000 <= (3.12) < 0x8000)
- * \param lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
- * \param lp_half_order LP filter order, divided by 2
+ * @brief LSP to LP conversion (3.2.6 of G.729)
+ * @param[out] lp decoded LP coefficients (-0x8000 <= (3.12) < 0x8000)
+ * @param lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
+ * @param lp_half_order LP filter order, divided by 2
*/
void ff_acelp_lsp2lpc(int16_t* lp, const int16_t* lsp, int lp_half_order);
@@ -81,12 +81,12 @@ void ff_acelp_lsp2lpc(int16_t* lp, const int16_t* lsp, int lp_half_order);
void ff_amrwb_lsp2lpc(const double *lsp, float *lp, int lp_order);
/**
- * \brief Interpolate LSP for the first subframe and convert LSP -> LP for both subframes (3.2.5 and 3.2.6 of G.729)
- * \param[out] lp_1st decoded LP coefficients for first subframe (-0x8000 <= (3.12) < 0x8000)
- * \param[out] lp_2nd decoded LP coefficients for second subframe (-0x8000 <= (3.12) < 0x8000)
- * \param lsp_2nd LSP coefficients of the second subframe (-0x8000 <= (0.15) < 0x8000)
- * \param lsp_prev LSP coefficients from the second subframe of the previous frame (-0x8000 <= (0.15) < 0x8000)
- * \param lp_order LP filter order
+ * @brief Interpolate LSP for the first subframe and convert LSP -> LP for both subframes (3.2.5 and 3.2.6 of G.729)
+ * @param[out] lp_1st decoded LP coefficients for first subframe (-0x8000 <= (3.12) < 0x8000)
+ * @param[out] lp_2nd decoded LP coefficients for second subframe (-0x8000 <= (3.12) < 0x8000)
+ * @param lsp_2nd LSP coefficients of the second subframe (-0x8000 <= (0.15) < 0x8000)
+ * @param lsp_prev LSP coefficients from the second subframe of the previous frame (-0x8000 <= (0.15) < 0x8000)
+ * @param lp_order LP filter order
*/
void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd, const int16_t* lsp_prev, int lp_order);
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index c12ebf4c7c..46068bfe3d 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -248,7 +248,7 @@ static int cmp_internal(MpegEncContext *s, const int x, const int y, const int s
}
}
-/*! \brief compares a block (either a full macroblock or a partition thereof)
+/** @brief compares a block (either a full macroblock or a partition thereof)
against a proposed motion-compensated prediction of that block
*/
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 461e85932b..5d319c5da2 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -992,8 +992,8 @@ static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dm
return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
}
-/*!
- \param P[10][2] a list of candidate mvs to check before starting the
+/**
+ @param P[10][2] a list of candidate mvs to check before starting the
iterative search. If one of the candidates is close to the optimal mv, then
it takes fewer iterations. And it increases the chance that we find the
optimal mv.
@@ -1003,12 +1003,12 @@ static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int
int ref_mv_scale, int flags, int size, int h)
{
MotionEstContext * const c= &s->me;
- int best[2]={0, 0}; /*!< x and y coordinates of the best motion vector.
+ int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
i.e. the difference between the position of the
block currently being encoded and the position of
the block chosen to predict it from. */
int d; ///< the score (cmp + penalty) of any given mv
- int dmin; /*!< the best value of d, i.e. the score
+ int dmin; /**< the best value of d, i.e. the score
corresponding to the mv stored in best[]. */
int map_generation;
int penalty_factor;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index a0ff354a08..2a54329a49 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -154,7 +154,7 @@ typedef struct MotionEstContext{
uint32_t *score_map; ///< map to store the scores
int map_generation;
int pre_penalty_factor;
- int penalty_factor; /*!< an estimate of the bits required to
+ int penalty_factor; /**< an estimate of the bits required to
code a given mv value, e.g. (1,0) takes
more bits than (0,0). We have to
estimate whether any reduction in
diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index 6eb6de3101..f31be59d8d 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -63,11 +63,11 @@ static const uint8_t fallback_cquant[] = {
};
/**
- * \brief copy frame data from buffer to AVFrame, handling stride.
- * \param f destination AVFrame
- * \param src source buffer, does not use any line-stride
- * \param width width of the video frame
- * \param height height of the video frame
+ * @brief copy frame data from buffer to AVFrame, handling stride.
+ * @param f destination AVFrame
+ * @param src source buffer, does not use any line-stride
+ * @param width width of the video frame
+ * @param height height of the video frame
*/
static void copy_frame(AVFrame *f, const uint8_t *src,
int width, int height) {
@@ -77,7 +77,7 @@ static void copy_frame(AVFrame *f, const uint8_t *src,
}
/**
- * \brief extract quantization tables from codec data into our context
+ * @brief extract quantization tables from codec data into our context
*/
static int get_quant(AVCodecContext *avctx, NuvContext *c,
const uint8_t *buf, int size) {
@@ -94,7 +94,7 @@ static int get_quant(AVCodecContext *avctx, NuvContext *c,
}
/**
- * \brief set quantization tables from a quality value
+ * @brief set quantization tables from a quality value
*/
static void get_quant_quality(NuvContext *c, int quality) {
int i;
diff --git a/libavcodec/qcelpdata.h b/libavcodec/qcelpdata.h
index d79cea9f6c..82cc61d7ea 100644
--- a/libavcodec/qcelpdata.h
+++ b/libavcodec/qcelpdata.h
@@ -74,9 +74,9 @@ typedef struct {
static const float qcelp_hammsinc_table[4] = { -0.006822, 0.041249, -0.143459, 0.588863};
typedef struct {
- uint8_t index; /*!< index into the QCELPContext structure */
- uint8_t bitpos; /*!< position of the lowest bit in the value's byte */
- uint8_t bitlen; /*!< number of bits to read */
+ uint8_t index; /**< index into the QCELPContext structure */
+ uint8_t bitpos; /**< position of the lowest bit in the value's byte */
+ uint8_t bitlen; /**< number of bits to read */
} QCELPBitmap;
#define QCELP_OF(variable, bit, len) {offsetof(QCELPFrame, variable), bit, len}
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index 3ed821c81e..d565003a9e 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -46,7 +46,7 @@
typedef enum
{
- I_F_Q = -1, /*!< insufficient frame quality */
+ I_F_Q = -1, /**< insufficient frame quality */
SILENCE,
RATE_OCTAVE,
RATE_QUARTER,
@@ -58,12 +58,12 @@ typedef struct
{
GetBitContext gb;
qcelp_packet_rate bitrate;
- QCELPFrame frame; /*!< unpacked data frame */
+ QCELPFrame frame; /**< unpacked data frame */
uint8_t erasure_count;
- uint8_t octave_count; /*!< count the consecutive RATE_OCTAVE frames */
+ uint8_t octave_count; /**< count the consecutive RATE_OCTAVE frames */
float prev_lspf[10];
- float predictor_lspf[10];/*!< LSP predictor for RATE_OCTAVE and I_F_Q */
+ float predictor_lspf[10];/**< LSP predictor for RATE_OCTAVE and I_F_Q */
float pitch_synthesis_filter_mem[303];
float pitch_pre_filter_mem[303];
float rnd_fir_filter_mem[180];
diff --git a/libavcodec/rtjpeg.c b/libavcodec/rtjpeg.c
index 4c48f25b2c..303183f230 100644
--- a/libavcodec/rtjpeg.c
+++ b/libavcodec/rtjpeg.c
@@ -33,12 +33,12 @@
if (n) {skip_bits(gb, n);}
/**
- * \brief read one block from stream
- * \param gb contains stream data
- * \param block where data is written to
- * \param scan array containing the mapping stream address -> block position
- * \param quant quantization factors
- * \return 0 means the block is not coded, < 0 means an error occurred.
+ * @brief read one block from stream
+ * @param gb contains stream data
+ * @param block where data is written to
+ * @param scan array containing the mapping stream address -> block position
+ * @param quant quantization factors
+ * @return 0 means the block is not coded, < 0 means an error occurred.
*
* Note: GetBitContext is used to make the code simpler, since all data is
* aligned this could be done faster in a different way, e.g. as it is done
@@ -96,13 +96,13 @@ static inline int get_block(GetBitContext *gb, DCTELEM *block, const uint8_t *sc
}
/**
- * \brief decode one rtjpeg YUV420 frame
- * \param c context, must be initialized via rtjpeg_decode_init
- * \param f AVFrame to place decoded frame into. If parts of the frame
+ * @brief decode one rtjpeg YUV420 frame
+ * @param c context, must be initialized via rtjpeg_decode_init
+ * @param f AVFrame to place decoded frame into. If parts of the frame
* are not coded they are left unchanged, so consider initializing it
- * \param buf buffer containing input data
- * \param buf_size length of input data in bytes
- * \return number of bytes consumed from the input buffer
+ * @param buf buffer containing input data
+ * @param buf_size length of input data in bytes
+ * @return number of bytes consumed from the input buffer
*/
int rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
const uint8_t *buf, int buf_size) {
@@ -143,15 +143,15 @@ int rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
}
/**
- * \brief initialize an RTJpegContext, may be called multiple times
- * \param c context to initialize
- * \param dsp specifies the idct to use for decoding
- * \param width width of image, will be rounded down to the nearest multiple
+ * @brief initialize an RTJpegContext, may be called multiple times
+ * @param c context to initialize
+ * @param dsp specifies the idct to use for decoding
+ * @param width width of image, will be rounded down to the nearest multiple
* of 16 for decoding
- * \param height height of image, will be rounded down to the nearest multiple
+ * @param height height of image, will be rounded down to the nearest multiple
* of 16 for decoding
- * \param lquant luma quantization table to use
- * \param cquant chroma quantization table to use
+ * @param lquant luma quantization table to use
+ * @param cquant chroma quantization table to use
*/
void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,
int width, int height,
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index d81b9a387b..d3e4dd956f 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -56,7 +56,7 @@ void write_##type##_2d_array(const void *arg, int len, int len2)\
}
/**
- * \defgroup printfuncs Predefined functions for printing tables
+ * @defgroup printfuncs Predefined functions for printing tables
*
* \{
*/
diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c
index f8e75bb933..e7aceebd5b 100644
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@@ -411,7 +411,7 @@ static inline float mulawinv(float y, float clip, float mu)
* a*b == 200 and the nearest integer is ill-defined, use a table to emulate
* the following broken float-based implementation used by the binary decoder:
*
- * \code
+ * @code
* static int very_broken_op(int a, int b)
* {
* static float test; // Ugh, force gcc to do the division first...
@@ -419,7 +419,7 @@ static inline float mulawinv(float y, float clip, float mu)
* test = a/400.;
* return b * test + 0.5;
* }
- * \endcode
+ * @endcode
*
* @note if this function is replaced by just ROUNDED_DIV(a*b,400.), the stddev
* between the original file (before encoding with Yamaha encoder) and the
@@ -938,14 +938,14 @@ static void permutate_in_line(int16_t *tab, int num_vect, int num_blocks,
/**
* Interpret the input data as in the following table:
*
- * \verbatim
+ * @verbatim
*
* abcdefgh
* ijklmnop
* qrstuvw
* x123456
*
- * \endverbatim
+ * @endverbatim
*
* and transpose it, giving the output
* aiqxbjr1cks2dlt3emu4fvn5gow6hp
diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c
index de028a0a7e..774fde840f 100644
--- a/libavcodec/vaapi.c
+++ b/libavcodec/vaapi.c
@@ -24,7 +24,7 @@
#include "vaapi_internal.h"
/**
- * \addtogroup VAAPI_Decoding
+ * @addtogroup VAAPI_Decoding
*
* @{
*/
diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index 07568a47fc..4c3bb9bb52 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h
@@ -27,8 +27,8 @@
#include <stdint.h>
/**
- * \defgroup VAAPI_Decoding VA API Decoding
- * \ingroup Decoder
+ * @defgroup VAAPI_Decoding VA API Decoding
+ * @ingroup Decoder
* @{
*/
diff --git a/libavcodec/vaapi_internal.h b/libavcodec/vaapi_internal.h
index 2c0fdf945e..43fa889d15 100644
--- a/libavcodec/vaapi_internal.h
+++ b/libavcodec/vaapi_internal.h
@@ -30,7 +30,7 @@
#include "mpegvideo.h"
/**
- * \addtogroup VAAPI_Decoding
+ * @addtogroup VAAPI_Decoding
*
* @{
*/
diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 19bd96bc15..9fbcbf9a3f 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c
@@ -33,7 +33,7 @@
#include "vdpau_internal.h"
/**
- * \addtogroup VDPAU_Decoding
+ * @addtogroup VDPAU_Decoding
*
* @{
*/
diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
index 0dc6fb850b..f3a547184d 100644
--- a/libavcodec/vdpau.h
+++ b/libavcodec/vdpau.h
@@ -25,7 +25,7 @@
#define AVCODEC_VDPAU_H
/**
- * \defgroup Decoder VDPAU Decoder and Renderer
+ * @defgroup Decoder VDPAU Decoder and Renderer
*
* VDPAU hardware acceleration has two modules
* - VDPAU decoding
@@ -38,25 +38,25 @@
* and rendering (API calls) are done as part of the VDPAU
* presentation (vo_vdpau.c) module.
*
- * \defgroup VDPAU_Decoding VDPAU Decoding
- * \ingroup Decoder
+ * @defgroup VDPAU_Decoding VDPAU Decoding
+ * @ingroup Decoder
* @{
*/
#include <vdpau/vdpau.h>
#include <vdpau/vdpau_x11.h>
-/** \brief The videoSurface is used for rendering. */
+/** @brief The videoSurface is used for rendering. */
#define FF_VDPAU_STATE_USED_FOR_RENDER 1
/**
- * \brief The videoSurface is needed for reference/prediction.
+ * @brief The videoSurface is needed for reference/prediction.
* The codec manipulates this.
*/
#define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
/**
- * \brief This structure is used as a callback between the FFmpeg
+ * @brief This structure is used as a callback between the FFmpeg
* decoder (vd_) and presentation (vo_) module.
* This is used for defining a video frame containing surface,
* picture parameter, bitstream information etc which are passed
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index 5185d61e54..fc75a57519 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -43,7 +43,7 @@
#include "idct_xvid.h"
#include "dsputil_mmx.h"
-/*!
+/**
* @file
* @brief SSE2 idct compatible with xvidmmx
*/
diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h
index 5fdc20d3ea..be91d1c68a 100644
--- a/libavcodec/x86/idct_xvid.h
+++ b/libavcodec/x86/idct_xvid.h
@@ -18,7 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-/*!
+/**
* @file
* header for Xvid IDCT functions
*/
diff --git a/libavcodec/xsubenc.c b/libavcodec/xsubenc.c
index a7e3a891d4..0e950d1856 100644
--- a/libavcodec/xsubenc.c
+++ b/libavcodec/xsubenc.c
@@ -36,8 +36,8 @@
/**
* Encode a single color run. At most 16 bits will be used.
- * \param len length of the run, values > 255 mean "until end of line", may not be < 0.
- * \param color color to encode, only the lowest two bits are used and all others must be 0.
+ * @param len length of the run, values > 255 mean "until end of line", may not be < 0.
+ * @param color color to encode, only the lowest two bits are used and all others must be 0.
*/
static void put_xsub_rle(PutBitContext *pb, int len, int color)
{