summaryrefslogtreecommitdiff
path: root/libavcodec/ppc/dsputil_altivec.c
diff options
context:
space:
mode:
authorRomain Dolbeau <dolbeau@irisa.fr>2003-01-19 19:00:45 +0000
committerMichael Niedermayer <michaelni@gmx.at>2003-01-19 19:00:45 +0000
commit35e5fb067e0099fde6c3429131f11433eb92f22b (patch)
tree50480aa9498173e6b2f12f977d6e948b8f90fa53 /libavcodec/ppc/dsputil_altivec.c
parent8c55915b14897fd9bdaae9241f2dd54c00a869ef (diff)
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
Originally committed as revision 1476 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/ppc/dsputil_altivec.c')
-rw-r--r--libavcodec/ppc/dsputil_altivec.c254
1 files changed, 198 insertions, 56 deletions
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index e7f34ee16c..fcae7c1180 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -24,22 +24,6 @@
#include <sys/sysctl.h>
#endif
-#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT
-unsigned long long perfdata[altivec_perf_total][altivec_data_total];
-/* list below must match enum in dsputil_altivec.h */
-static unsigned char* perfname[] = {
- "fft_calc",
- "gmc1",
- "dct_unquantize_h263",
- "idct_add",
- "idct_put",
- "put_pixels_clamped",
- "put_pixels16",
- "avg_pixels16"
-};
-#include <stdio.h>
-#endif
-
int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
int i;
@@ -648,12 +632,12 @@ extern UINT8 cropTbl[];
void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels,
int line_size)
{
-ALTIVEC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
UINT8 *cm = cropTbl + MAX_NEG_CROP;
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
/* read the pixels */
for(i=0;i<8;i++) {
@@ -670,7 +654,7 @@ ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
block += 8;
}
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
register const vector short vczero = (const vector short)(0);
@@ -681,7 +665,7 @@ ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
pixelsv0, pixelsv1, pixelsv2, pixelsv3, pixelsv4,
pixelsv0old, pixelsv4old;
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
blockv0 = vec_ld(0, block);
blockv1 = vec_ld(16, block);
@@ -720,17 +704,17 @@ ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
vec_st(pixelsv3, 48, pixels);
}
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
-ALTIVEC_TBL_DECLARE(altivec_put_pixels16_num, 1);
+POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
for(i=0; i<h; i++) {
*((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
@@ -741,25 +725,24 @@ ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
block +=line_size;
}
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
-
- register vector unsigned char perm = vec_lvsl(0, pixels);
register vector unsigned char pixelsv1, pixelsv2;
int i;
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
for(i=0; i<h; i++) {
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
pixelsv2 = vec_ld(16, (unsigned char*)pixels);
- vec_st(vec_perm(pixelsv1, pixelsv2, perm), 0, (unsigned char*)block);
+ vec_st(vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)),
+ 0, (unsigned char*)block);
pixels+=line_size;
block +=line_size;
}
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
@@ -767,11 +750,11 @@ ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
-ALTIVEC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
-ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
for(i=0; i<h; i++) {
op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
@@ -782,32 +765,210 @@ ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
block +=line_size;
}
-ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
-
- register vector unsigned char perm = vec_lvsl(0, pixels);
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
int i;
-ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
for(i=0; i<h; i++) {
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
pixelsv2 = vec_ld(16, (unsigned char*)pixels);
blockv = vec_ld(0, block);
- pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
+ pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
blockv = vec_avg(blockv,pixelsv);
vec_st(blockv, 0, (unsigned char*)block);
pixels+=line_size;
block +=line_size;
}
-ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
+void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels,
+ int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int i;
+POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+ for (i = 0; i < h; i++) {
+ *((uint32_t *) (block)) =
+ (((*((uint32_t *) (block))) |
+ ((((const struct unaligned_32 *) (pixels))->l))) -
+ ((((*((uint32_t *) (block))) ^
+ ((((const struct unaligned_32 *) (pixels))->
+ l))) & 0xFEFEFEFEUL) >> 1));
+ *((uint32_t *) (block + 4)) =
+ (((*((uint32_t *) (block + 4))) |
+ ((((const struct unaligned_32 *) (pixels + 4))->l))) -
+ ((((*((uint32_t *) (block + 4))) ^
+ ((((const struct unaligned_32 *) (pixels +
+ 4))->
+ l))) & 0xFEFEFEFEUL) >> 1));
+ pixels += line_size;
+ block += line_size;
+ }
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+ int i;
+
+POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+
+ for (i = 0; i < h; i++) {
+ /*
+ block is 8 bytes-aligned, so we're either in the
+ left block (16 bytes-aligned) or in the right block (not)
+ */
+ int rightside = ((unsigned long)block & 0x0000000F);
+
+ blockv = vec_ld(0, block);
+ pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+ pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+ pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
+
+ if (rightside)
+ {
+ pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
+ }
+ else
+ {
+ pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
+ }
+
+ blockv = vec_avg(blockv, pixelsv);
+
+ vec_st(blockv, 0, block);
+
+ pixels += line_size;
+ block += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+ int j;
+POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+ for (j = 0; j < 2; j++) {
+ int i;
+ const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ const uint32_t b =
+ (((const struct unaligned_32 *) (pixels + 1))->l);
+ uint32_t l0 =
+ (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+ uint32_t h0 =
+ ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ uint32_t l1, h1;
+ pixels += line_size;
+ for (i = 0; i < h; i += 2) {
+ uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+ uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+ h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ a = (((const struct unaligned_32 *) (pixels))->l);
+ b = (((const struct unaligned_32 *) (pixels + 1))->l);
+ l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+ *((uint32_t *) block) =
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+ pixels += line_size;
+ block += line_size;
+ } pixels += 4 - line_size * (h + 1);
+ block += 4 - line_size * h;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+ register int i;
+ register vector unsigned char
+ pixelsv1, pixelsv2,
+ pixelsavg;
+ register vector unsigned char
+ blockv, temp1, temp2;
+ register vector unsigned short
+ pixelssum1, pixelssum2, temp3;
+ register const vector unsigned char vczero = (const vector unsigned char)(0);
+ register const vector unsigned short vctwo = (const vector unsigned short)(2);
+
+ temp1 = vec_ld(0, pixels);
+ temp2 = vec_ld(16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+ if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+ }
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ pixelssum1 = vec_add(pixelssum1, vctwo);
+
+POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+ for (i = 0; i < h ; i++) {
+ int rightside = ((unsigned long)block & 0x0000000F);
+ blockv = vec_ld(0, block);
+
+ temp1 = vec_ld(line_size, pixels);
+ temp2 = vec_ld(line_size + 16, pixels);
+ pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+ if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
+ {
+ pixelsv2 = temp2;
+ }
+ else
+ {
+ pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+ }
+
+ pixelsv1 = vec_mergeh(vczero, pixelsv1);
+ pixelsv2 = vec_mergeh(vczero, pixelsv2);
+ pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+ (vector unsigned short)pixelsv2);
+ temp3 = vec_add(pixelssum1, pixelssum2);
+ temp3 = vec_sra(temp3, vctwo);
+ pixelssum1 = vec_add(pixelssum2, vctwo);
+ pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+
+ if (rightside)
+ {
+ blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+ }
+ else
+ {
+ blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+ }
+
+ vec_st(blockv, 0, block);
+
+ block += line_size;
+ pixels += line_size;
+ }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
int has_altivec(void)
{
#if CONFIG_DARWIN
@@ -822,22 +983,3 @@ int has_altivec(void)
#endif
return 0;
}
-
-#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT
-void altivec_display_perf_report(void)
-{
- int i;
- fprintf(stderr, "AltiVec performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
- for(i = 0 ; i < altivec_perf_total ; i++)
- {
- if (perfdata[i][altivec_data_num] != (unsigned long long)0)
- fprintf(stderr, " Function \"%s\":\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
- perfname[i],
- perfdata[i][altivec_data_min],
- perfdata[i][altivec_data_max],
- (double)perfdata[i][altivec_data_sum] /
- (double)perfdata[i][altivec_data_num],
- perfdata[i][altivec_data_num]);
- }
-}
-#endif /* ALTIVEC_TBL_PERFORMANCE_REPORT */