diff options
Diffstat (limited to 'libavfilter/libmpcodecs/vf_fspp.c')
-rw-r--r-- | libavfilter/libmpcodecs/vf_fspp.c | 57 |
1 files changed, 32 insertions, 25 deletions
diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c index a8a33e2644..b786917313 100644 --- a/libavfilter/libmpcodecs/vf_fspp.c +++ b/libavfilter/libmpcodecs/vf_fspp.c @@ -75,7 +75,7 @@ static const short custom_threshold[64]= 20, 27, 26, 23, 20, 15, 11, 5 }; -static const uint8_t __attribute__((aligned(32))) dither[8][8]={ +DECLARE_ASM_CONST(32, uint8_t, dither)[8][8]={ { 0, 48, 12, 60, 3, 51, 15, 63, }, { 32, 16, 44, 28, 35, 19, 47, 31, }, { 8, 56, 4, 52, 11, 59, 7, 55, }, @@ -215,11 +215,11 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long sr "psraw %%mm5, %%mm3 \n\t" "psraw %%mm5, %%mm4 \n\t" "1: \n\t" - "movq %%mm7, (%%"REG_S",%%"REG_a",) \n\t" + "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t" "movq (%%"REG_S"), %%mm0 \n\t" "movq 8(%%"REG_S"), %%mm1 \n\t" - "movq %%mm7, 8(%%"REG_S",%%"REG_a",) \n\t" + "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t" "paddw %%mm3, %%mm0 \n\t" "paddw %%mm4, %%mm1 \n\t" @@ -286,15 +286,15 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s "movq 8(%%"REG_S"), %%mm1 \n\t" "paddw %%mm3, %%mm0 \n\t" - "paddw (%%"REG_S",%%"REG_a",), %%mm0 \n\t" + "paddw (%%"REG_S",%%"REG_a"), %%mm0 \n\t" "paddw %%mm4, %%mm1 \n\t" - "movq 8(%%"REG_S",%%"REG_a",), %%mm6 \n\t" + "movq 8(%%"REG_S",%%"REG_a"), %%mm6 \n\t" - "movq %%mm7, (%%"REG_S",%%"REG_a",) \n\t" + "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t" "psraw %%mm2, %%mm0 \n\t" "paddw %%mm6, %%mm1 \n\t" - "movq %%mm7, 8(%%"REG_S",%%"REG_a",) \n\t" + "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t" "psraw %%mm2, %%mm1 \n\t" "packuswb %%mm1, %%mm0 \n\t" @@ -416,7 +416,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, const int stride= is_luma ? p->temp_stride : (width+16);//((width+16+15)&(~15)) const int step=6-p->log2_count; const int qps= 3 + is_luma; - int32_t __attribute__((aligned(32))) block_align[4*8*BLOCKSZ+ 4*8*BLOCKSZ]; + DECLARE_ALIGNED(32, int32_t, block_align)[4*8*BLOCKSZ+ 4*8*BLOCKSZ]; int16_t *block= (int16_t *)block_align; int16_t *block3=(int16_t *)(block_align+4*8*BLOCKSZ); @@ -873,7 +873,7 @@ static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt) { - uint64_t __attribute__((aligned(8))) temps[4]; + DECLARE_ALIGNED(8, uint64_t, temps)[4]; __asm__ volatile( ASMALIGN(4) "1: \n\t" @@ -1598,6 +1598,10 @@ static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps) : "d"(thr_adr) + NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,MM_2,MM_FIX_1_414213562_A,MM_FIX_1_414213562,MM_FIX_0_382683433, + ff_MM_FIX_0_541196100,MM_FIX_1_306562965,MM_FIX_0_847759065) + NAMED_CONSTRAINTS_ADD(MM_FIX_0_566454497,MM_FIX_0_198912367,MM_FIX_2_613125930,MM_FIX_1_847759065, + MM_FIX_1_082392200,ff_MM_FIX_0_541196100,MM_FIX_1_306562965) : "%"REG_a ); } @@ -1674,7 +1678,7 @@ static void row_idct_c(int16_t* workspace, static void row_idct_mmx (int16_t* workspace, int16_t* output_adr, int output_stride, int cnt) { - uint64_t __attribute__((aligned(8))) temps[4]; + DECLARE_ALIGNED(8, uint64_t, temps)[4]; __asm__ volatile( "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "1: \n\t" @@ -1816,7 +1820,7 @@ static void row_idct_mmx (int16_t* workspace, "paddw (%%"REG_D"), %%mm5 \n\t" "psraw $3, %%mm7 \n\t" - "paddw (%%"REG_D",%%"REG_a",), %%mm1 \n\t" + "paddw (%%"REG_D",%%"REG_a"), %%mm1 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t" @@ -1825,7 +1829,7 @@ static void row_idct_mmx (int16_t* workspace, "movq %%mm5, (%%"REG_D") \n\t" "paddw %%mm2, %%mm6 \n\t" - "movq %%mm1, (%%"REG_D",%%"REG_a",) \n\t" + "movq %%mm1, (%%"REG_D",%%"REG_a") \n\t" "psraw $3, %%mm0 \n\t" "movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t" @@ -1837,7 +1841,7 @@ static void row_idct_mmx (int16_t* workspace, "paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t" "psubw %%mm4, %%mm5 \n\t" //d3 - "paddw (%%"REG_D",%%"REG_d",), %%mm3 \n\t" + "paddw (%%"REG_D",%%"REG_d"), %%mm3 \n\t" "psraw $3, %%mm6 \n\t" "paddw 1*8+%3, %%mm4 \n\t" //d4 @@ -1852,13 +1856,13 @@ static void row_idct_mmx (int16_t* workspace, "paddw (%%"REG_D"), %%mm5 \n\t" "psraw $3, %%mm4 \n\t" - "paddw (%%"REG_D",%%"REG_a",), %%mm4 \n\t" + "paddw (%%"REG_D",%%"REG_a"), %%mm4 \n\t" "add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows - "movq %%mm3, (%%"REG_D",%%"REG_d",) \n\t" + "movq %%mm3, (%%"REG_D",%%"REG_d") \n\t" "movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t" "movq %%mm5, (%%"REG_D") \n\t" - "movq %%mm4, (%%"REG_D",%%"REG_a",) \n\t" + "movq %%mm4, (%%"REG_D",%%"REG_a") \n\t" "sub %%"REG_d", %%"REG_D" \n\t" "add $8, %%"REG_D" \n\t" @@ -1867,6 +1871,8 @@ static void row_idct_mmx (int16_t* workspace, : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps) : "a"(output_stride*sizeof(short)) + NAMED_CONSTRAINTS_ADD(MM_FIX_1_414213562_A,MM_FIX_2_613125930,MM_FIX_1_847759065,MM_FIX_1_082392200, + MM_FIX_1_414213562,MM_DESCALE_RND) : "%"REG_d ); } @@ -1940,14 +1946,14 @@ static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt) { - uint64_t __attribute__((aligned(8))) temps[4]; + DECLARE_ALIGNED(8, uint64_t, temps)[4]; __asm__ volatile( "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "6: \n\t" "movd (%%"REG_S"), %%mm0 \n\t" "pxor %%mm7, %%mm7 \n\t" - "movd (%%"REG_S",%%"REG_a",), %%mm1 \n\t" + "movd (%%"REG_S",%%"REG_a"), %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t" @@ -1962,7 +1968,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, "movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch! "movq %%mm1, %%mm6 \n\t" - "movd (%%"REG_S",%%"REG_d",), %%mm4 \n\t" //6 + "movd (%%"REG_S",%%"REG_d"), %%mm4 \n\t" //6 "punpcklbw %%mm7, %%mm3 \n\t" "psubw %%mm3, %%mm5 \n\t" @@ -1974,16 +1980,16 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5 "paddw %%mm4, %%mm1 \n\t" - "movq %%mm5, 0*8+%3 \n\t" //t7 + "movq %%mm5, %3 \n\t" //t7 "punpcklbw %%mm7, %%mm3 \n\t" - "movq %%mm6, 1*8+%3 \n\t" //t6 + "movq %%mm6, %4 \n\t" //t6 "movq %%mm2, %%mm4 \n\t" "movd (%%"REG_S"), %%mm5 \n\t" //3 "paddw %%mm3, %%mm2 \n\t" - "movd (%%"REG_S",%%"REG_a",), %%mm6 \n\t" //4 + "movd (%%"REG_S",%%"REG_a"), %%mm6 \n\t" //4 "punpcklbw %%mm7, %%mm5 \n\t" "psubw %%mm3, %%mm4 \n\t" @@ -2023,7 +2029,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, "psubw %%mm1, %%mm5 \n\t" //d1 "movq %%mm0, %%mm6 \n\t" - "movq 1*8+%3, %%mm1 \n\t" + "movq %4, %%mm1 \n\t" "punpcklwd %%mm5, %%mm0 \n\t" "punpckhwd %%mm5, %%mm6 \n\t" @@ -2047,7 +2053,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" "psllw $2, %%mm3 \n\t" //t10 - "movq 0*8+%3, %%mm2 \n\t" + "movq %3, %%mm2 \n\t" "psllw $2, %%mm4 \n\t" //t11 "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3 @@ -2110,8 +2116,9 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, "dec %%"REG_c" \n\t" "jnz 6b \n\t" - : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps) + : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps), "=o"(temps[1]) : "a"(line_size) + NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,ff_MM_FIX_0_541196100,MM_FIX_0_382683433,MM_FIX_1_306562965) : "%"REG_d); } |