From 40d0e665d09aca5918c0b70b7045f32fae71f3eb Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Thu, 8 May 2008 21:11:24 +0000 Subject: Do not misuse long as the size of a register in x86. typedef x86_reg as the appropriate size and use it instead. Originally committed as revision 13081 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/cavsdsp_mmx.c | 7 +-- libavcodec/i386/cpuid.c | 2 +- libavcodec/i386/dsputil_h264_template_mmx.c | 4 +- libavcodec/i386/dsputil_h264_template_ssse3.c | 8 ++-- libavcodec/i386/dsputil_mmx.c | 64 +++++++++++++-------------- libavcodec/i386/dsputil_mmx_avg.h | 44 +++++++++--------- libavcodec/i386/dsputil_mmx_qns.h | 4 +- libavcodec/i386/dsputil_mmx_rnd.h | 16 +++---- libavcodec/i386/dsputilenc_mmx.c | 40 ++++++++--------- libavcodec/i386/fft_3dn.c | 4 +- libavcodec/i386/fft_3dn2.c | 7 ++- libavcodec/i386/fft_sse.c | 7 ++- libavcodec/i386/flacdsp_mmx.c | 9 ++-- libavcodec/i386/h264_i386.h | 2 +- libavcodec/i386/h264dsp_mmx.c | 56 +++++++++++------------ libavcodec/i386/motion_est_mmx.c | 22 ++++----- libavcodec/i386/mpegvideo_mmx.c | 12 ++--- libavcodec/i386/mpegvideo_mmx_template.c | 2 +- libavcodec/i386/snowdsp_mmx.c | 32 +++++++------- libavcodec/i386/vc1dsp_mmx.c | 18 ++++---- libavutil/x86_cpu.h | 2 + 21 files changed, 187 insertions(+), 175 deletions(-) diff --git a/libavcodec/i386/cavsdsp_mmx.c b/libavcodec/i386/cavsdsp_mmx.c index 141382fb07..618c60dce6 100644 --- a/libavcodec/i386/cavsdsp_mmx.c +++ b/libavcodec/i386/cavsdsp_mmx.c @@ -25,6 +25,7 @@ #include "dsputil.h" #include "dsputil_mmx.h" #include "common.h" +#include "x86_cpu.h" /***************************************************************************** * @@ -301,7 +302,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ \ : "+a"(src), "+c"(dst)\ - : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ + : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ : "memory"\ );\ if(h==16){\ @@ -316,7 +317,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ \ : "+a"(src), "+c"(dst)\ - : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ + : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ : "memory"\ );\ }\ @@ -367,7 +368,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt "decl %2 \n\t"\ " jnz 1b \n\t"\ : "+a"(src), "+c"(dst), "+m"(h)\ - : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\ + : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\ : "memory"\ );\ }\ diff --git a/libavcodec/i386/cpuid.c b/libavcodec/i386/cpuid.c index 879d36cd38..a07749d1d3 100644 --- a/libavcodec/i386/cpuid.c +++ b/libavcodec/i386/cpuid.c @@ -42,7 +42,7 @@ int mm_support(void) int rval = 0; int eax, ebx, ecx, edx; int max_std_level, max_ext_level, std_caps=0, ext_caps=0; - long a, c; + x86_reg a, c; asm volatile ( /* See if CPUID instruction is supported ... */ diff --git a/libavcodec/i386/dsputil_h264_template_mmx.c b/libavcodec/i386/dsputil_h264_template_mmx.c index 6aff9ff50d..a2daa0ba10 100644 --- a/libavcodec/i386/dsputil_h264_template_mmx.c +++ b/libavcodec/i386/dsputil_h264_template_mmx.c @@ -249,7 +249,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* "sub $2, %2 \n\t" "jnz 1b \n\t" : "+r"(dst), "+r"(src), "+r"(h) - : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) + : "r"((x86_reg)stride), "m"(ff_pw_32), "m"(x), "m"(y) ); } @@ -300,7 +300,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* "sub $1, %2\n\t" "jnz 1b\n\t" : "+r" (dst), "+r"(src), "+r"(h) - : "m" (ff_pw_32), "r"((long)stride) + : "m" (ff_pw_32), "r"((x86_reg)stride) : "%esi"); } diff --git a/libavcodec/i386/dsputil_h264_template_ssse3.c b/libavcodec/i386/dsputil_h264_template_ssse3.c index 2ff685d76d..5345ccc1d8 100644 --- a/libavcodec/i386/dsputil_h264_template_ssse3.c +++ b/libavcodec/i386/dsputil_h264_template_ssse3.c @@ -72,7 +72,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "lea (%0,%3,2), %0 \n\t" "jg 1b \n\t" :"+r"(dst), "+r"(src), "+r"(h) - :"r"((long)stride) + :"r"((x86_reg)stride) ); } else { asm volatile( @@ -100,7 +100,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "lea (%0,%3,2), %0 \n\t" "jg 1b \n\t" :"+r"(dst), "+r"(src), "+r"(h) - :"r"((long)stride) + :"r"((x86_reg)stride) ); } return; @@ -154,7 +154,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "lea (%0,%3,2), %0 \n\t" "jg 1b \n\t" :"+r"(dst), "+r"(src), "+r"(h) - :"r"((long)stride) + :"r"((x86_reg)stride) ); } @@ -202,7 +202,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* "lea (%0,%3,2), %0 \n\t" "jg 1b \n\t" :"+r"(dst), "+r"(src), "+r"(h) - :"r"((long)stride) + :"r"((x86_reg)stride) ); } diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 620b9a2478..b8beddded9 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -240,7 +240,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size "movq %%mm2, (%0, %1) \n\t" "movq %%mm4, (%0, %1, 2) \n\t" "movq %%mm6, (%0, %2) \n\t" - ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p) + ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p) :"memory"); pix += line_size*4; p += 32; @@ -265,7 +265,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size "movq %%mm2, (%0, %1) \n\t" "movq %%mm4, (%0, %1, 2) \n\t" "movq %%mm6, (%0, %2) \n\t" - ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p) + ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p) :"memory"); } @@ -349,7 +349,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size "subl $4, %0 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"((long)line_size) + : "r"((x86_reg)line_size) : "%"REG_a, "memory" ); } @@ -375,7 +375,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size "subl $4, %0 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"((long)line_size) + : "r"((x86_reg)line_size) : "%"REG_a, "memory" ); } @@ -409,7 +409,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz "subl $4, %0 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"((long)line_size) + : "r"((x86_reg)line_size) : "%"REG_a, "memory" ); } @@ -431,7 +431,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si "lea (%2,%3,4), %2 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"((long)line_size), "r"(3L*line_size) + : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size) : "memory" ); } @@ -457,7 +457,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si "lea (%2,%3,4), %2 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"((long)line_size), "r"(3L*line_size) + : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size) : "memory" ); } @@ -480,7 +480,7 @@ static void clear_blocks_mmx(DCTELEM *blocks) } static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ - long i=0; + x86_reg i=0; asm volatile( "1: \n\t" "movq (%1, %0), %%mm0 \n\t" @@ -495,14 +495,14 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ "cmp %3, %0 \n\t" " jb 1b \n\t" : "+r" (i) - : "r"(src), "r"(dst), "r"((long)w-15) + : "r"(src), "r"(dst), "r"((x86_reg)w-15) ); for(; inbits; - long i, j; + long j; + x86_reg i; long nblocks, nloops; FFTComplex *p, *cptr; diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/i386/fft_3dn2.c index 6d063321d7..c5337b8c2a 100644 --- a/libavcodec/i386/fft_3dn2.c +++ b/libavcodec/i386/fft_3dn2.c @@ -20,6 +20,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "dsputil.h" +#include "x86_cpu.h" static const int p1m1[2] __attribute__((aligned(8))) = { 0, 1 << 31 }; @@ -30,7 +31,8 @@ static const int m1p1[2] __attribute__((aligned(8))) = void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) { int ln = s->nbits; - long i, j; + long j; + x86_reg i; long nblocks, nloops; FFTComplex *p, *cptr; @@ -124,7 +126,8 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input, FFTSample *tmp) { - long k, n8, n4, n2, n; + long n8, n4, n2, n; + x86_reg k; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/i386/fft_sse.c index 39e64c7008..3a0f2d7427 100644 --- a/libavcodec/i386/fft_sse.c +++ b/libavcodec/i386/fft_sse.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "dsputil.h" +#include "x86_cpu.h" static const int p1p1p1m1[4] __attribute__((aligned(16))) = { 0, 0, 0, 1 << 31 }; @@ -48,7 +49,8 @@ static void print_v4sf(const char *str, __m128 a) void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) { int ln = s->nbits; - long i, j; + x86_reg i; + long j; long nblocks, nloops; FFTComplex *p, *cptr; @@ -142,7 +144,8 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input, FFTSample *tmp) { - long k, n8, n4, n2, n; + x86_reg k; + long n8, n4, n2, n; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; diff --git a/libavcodec/i386/flacdsp_mmx.c b/libavcodec/i386/flacdsp_mmx.c index e799ce421d..2096e3305b 100644 --- a/libavcodec/i386/flacdsp_mmx.c +++ b/libavcodec/i386/flacdsp_mmx.c @@ -20,13 +20,14 @@ */ #include "dsputil_mmx.h" +#include "x86_cpu.h" static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) { double c = 2.0 / (len-1.0); int n2 = len>>1; - long i = -n2*sizeof(int32_t); - long j = n2*sizeof(int32_t); + x86_reg i = -n2*sizeof(int32_t); + x86_reg j = n2*sizeof(int32_t); asm volatile( "movsd %0, %%xmm7 \n\t" "movapd %1, %%xmm6 \n\t" @@ -71,7 +72,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, double *data1 = tmp + lag; int j; - if((long)data1 & 15) + if((x86_reg)data1 & 15) data1++; apply_welch_window_sse2(data, len, data1); @@ -81,7 +82,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, data1[len] = 0.0; for(j=0; jblock_last_index[n]>=0); @@ -335,7 +335,7 @@ asm volatile( static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - long nCoeffs; + x86_reg nCoeffs; const uint16_t *quant_matrix; int block0; @@ -401,7 +401,7 @@ asm volatile( static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - long nCoeffs; + x86_reg nCoeffs; const uint16_t *quant_matrix; assert(s->block_last_index[n]>=0); diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c index 7d8d278c9f..c6b989420a 100644 --- a/libavcodec/i386/mpegvideo_mmx_template.c +++ b/libavcodec/i386/mpegvideo_mmx_template.c @@ -95,7 +95,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow) { - long last_non_zero_p1; + x86_reg last_non_zero_p1; int level=0, q; //=0 is because gcc says uninitialized ... const uint16_t *qmat, *bias; DECLARE_ALIGNED_16(int16_t, temp_block[64]); diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index e43f7e9de8..c9671f0c1e 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -73,7 +73,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ IDWTELEM * const dst = b+w2; i = 0; - for(; (((long)&dst[i]) & 0x1F) && i>W_AS); } for(; iline + src_y;\ - long tmp;\ + x86_reg tmp;\ asm volatile(\ "mov %7, %%"REG_c" \n\t"\ "mov %6, %2 \n\t"\ @@ -667,7 +667,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I "jnz 1b \n\t"\ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ :\ - "rm"((long)(src_x<<1)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ + "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"((x86_reg)b_h),"m"((x86_reg)src_stride):\ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); #define snow_inner_add_yblock_sse2_end_8\ @@ -684,8 +684,8 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I "dec %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2 -static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, - int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, + int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_sse2_header snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") snow_inner_add_yblock_sse2_accum_8("2", "8") @@ -732,8 +732,8 @@ snow_inner_add_yblock_sse2_accum_8("0", "136") snow_inner_add_yblock_sse2_end_8 } -static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, - int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, + int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_sse2_header snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") snow_inner_add_yblock_sse2_accum_16("2", "16") @@ -758,7 +758,7 @@ snow_inner_add_yblock_sse2_end_16 #define snow_inner_add_yblock_mmx_header \ IDWTELEM * * dst_array = sb->line + src_y;\ - long tmp;\ + x86_reg tmp;\ asm volatile(\ "mov %7, %%"REG_c" \n\t"\ "mov %6, %2 \n\t"\ @@ -815,11 +815,11 @@ snow_inner_add_yblock_sse2_end_16 "jnz 1b \n\t"\ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ :\ - "rm"((long)(src_x<<1)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ + "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"((x86_reg)b_h),"m"((x86_reg)src_stride):\ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); -static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, - int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, + int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_mmx_header snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") snow_inner_add_yblock_mmx_accum("2", "8", "0") @@ -829,8 +829,8 @@ snow_inner_add_yblock_mmx_mix("0", "0") snow_inner_add_yblock_mmx_end("16") } -static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, - int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, + int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){ snow_inner_add_yblock_mmx_header snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") snow_inner_add_yblock_mmx_accum("2", "16", "0") diff --git a/libavcodec/i386/vc1dsp_mmx.c b/libavcodec/i386/vc1dsp_mmx.c index 16fabd3e28..0e543e4fde 100644 --- a/libavcodec/i386/vc1dsp_mmx.c +++ b/libavcodec/i386/vc1dsp_mmx.c @@ -71,7 +71,7 @@ DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL; /** Sacrifying mm6 allows to pipeline loads from src */ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, - const uint8_t *src, long int stride, + const uint8_t *src, x86_reg stride, int rnd, int64_t shift) { asm volatile( @@ -107,7 +107,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, * Data is already unpacked, so some operations can directly be made from * memory. */ -static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, long int stride, +static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride, const int16_t *src, int rnd) { int h = 8; @@ -152,7 +152,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, long int stride, * Sacrify mm6 for *9 factor. */ static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src, - long int stride, int rnd, long int offset) + x86_reg stride, int rnd, x86_reg offset) { rnd = 8-rnd; asm volatile( @@ -259,7 +259,7 @@ DECLARE_ALIGNED_16(const uint64_t, ff_pw_18) = 0x0012001200120012ULL; #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ static void \ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ - long int src_stride, \ + x86_reg src_stride, \ int rnd, int64_t shift) \ { \ int h = 8; \ @@ -314,7 +314,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ */ #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4) \ static void \ -vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, long int stride, \ +vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ const int16_t *src, int rnd) \ { \ int h = 8; \ @@ -353,7 +353,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, long int stride, \ #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4) \ static void \ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ - long int stride, int rnd, long int offset) \ + x86_reg stride, int rnd, x86_reg offset) \ { \ int h = 8; \ src -= offset; \ @@ -387,9 +387,9 @@ MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,% MSPEL_FILTER13_VER_16B(shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )") MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)") -typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, long int src_stride, int rnd, int64_t shift); -typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, long int dst_stride, const int16_t *src, int rnd); -typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, long int stride, int rnd, long int offset); +typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, x86_reg src_stride, int rnd, int64_t shift); +typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, x86_reg dst_stride, const int16_t *src, int rnd); +typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset); /** * Interpolates fractional pel values by applying proper vertical then diff --git a/libavutil/x86_cpu.h b/libavutil/x86_cpu.h index 750e061c21..43619ad79d 100644 --- a/libavutil/x86_cpu.h +++ b/libavutil/x86_cpu.h @@ -31,6 +31,7 @@ # define REG_D "rdi" # define REG_S "rsi" # define PTR_SIZE "8" +typedef int64_t x86_reg; # define REG_SP "rsp" # define REG_BP "rbp" @@ -50,6 +51,7 @@ # define REG_D "edi" # define REG_S "esi" # define PTR_SIZE "4" +typedef int32_t x86_reg; # define REG_SP "esp" # define REG_BP "ebp" -- cgit v1.2.3