summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorMike Melanson <mike@multimedia.cx>2004-04-26 00:20:29 +0000
committerMike Melanson <mike@multimedia.cx>2004-04-26 00:20:29 +0000
commit116824d0aa1c416c3fb0f2c39d339fc00ae251f3 (patch)
tree0d062e4045aee066c0003234e1d4b716ad1b2c83 /libavcodec
parent4ea4b274697767abddda3c425ba4bb43dfdee52f (diff)
reorganize and simplify the VP3 IDCT stuff
Originally committed as revision 3071 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/dsputil.c3
-rw-r--r--libavcodec/dsputil.h47
-rw-r--r--libavcodec/i386/dsputil_mmx.c8
-rw-r--r--libavcodec/i386/vp3dsp_mmx.c80
-rw-r--r--libavcodec/i386/vp3dsp_sse2.c61
-rw-r--r--libavcodec/vp3.c34
-rw-r--r--libavcodec/vp3dsp.c80
7 files changed, 57 insertions, 256 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index fce0b81634..7b554b1fd0 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3126,8 +3126,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
/* VP3 DSP support */
c->vp3_dsp_init = vp3_dsp_init_c;
- c->vp3_idct_put = vp3_idct_put_c;
- c->vp3_idct_add = vp3_idct_add_c;
+ c->vp3_idct = vp3_idct_c;
c->get_pixels = get_pixels_c;
c->diff_pixels = diff_pixels_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index b5468724f1..0307dbd6ab 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -62,23 +62,16 @@ extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
/* VP3 DSP functions */
void vp3_dsp_init_c(void);
-void vp3_idct_put_c(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
-void vp3_idct_add_c(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
+void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, DCTELEM *output_data);
void vp3_dsp_init_mmx(void);
-void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
-void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
+void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, DCTELEM *output_data);
void vp3_dsp_init_sse2(void);
-void vp3_idct_put_sse2(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
-void vp3_idct_add_sse2(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
-
+void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, DCTELEM *output_data);
/* minimum alignment rules ;)
if u notice errors in the align stuff, need more alignment for some asm code for some cpu
@@ -318,32 +311,16 @@ typedef struct DSPContext {
/**
* This function is responsible for taking a block of zigzag'd,
- * quantized DCT coefficients, reconstructing the original block of
- * samples, and placing it into the output.
- * @param input_data 64 zigzag'd, quantized DCT coefficients
- * @param dequant_matrix 64 zigzag'd quantizer coefficients
- * @param coeff_count index of the last coefficient
- * @param dest the final output location where the transformed samples
- * are to be placed
- * @param stride the width in 8-bit samples of a line on this plane
- */
- void (*vp3_idct_put)(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
-
- /**
- * This function is responsible for taking a block of zigzag'd,
- * quantized DCT coefficients, reconstructing the original block of
- * samples, and adding the transformed samples to an existing block of
- * samples in the output.
+ * quantized DCT coefficients and reconstructing the original block of
+ * samples.
* @param input_data 64 zigzag'd, quantized DCT coefficients
* @param dequant_matrix 64 zigzag'd quantizer coefficients
* @param coeff_count index of the last coefficient
- * @param dest the final output location where the transformed samples
- * are to be placed
- * @param stride the width in 8-bit samples of a line on this plane
+ * @param output_samples space for 64 DCTELEMs where the transformed
+ * samples will be stored
*/
- void (*vp3_idct_add)(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride);
+ void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, DCTELEM *output_samples);
} DSPContext;
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 772c9c1f03..61bfc89ac5 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2149,14 +2149,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
/* VP3 optimized DSP functions */
if (mm_flags & MM_SSE2) {
c->vp3_dsp_init = vp3_dsp_init_sse2;
- c->vp3_idct_put = vp3_idct_put_sse2;
- c->vp3_idct_add = vp3_idct_add_sse2;
+ c->vp3_idct = vp3_idct_sse2;
} else {
c->vp3_dsp_init = vp3_dsp_init_mmx;
- c->vp3_idct_put = vp3_idct_put_mmx;
- c->vp3_idct_add = vp3_idct_add_mmx;
+ c->vp3_idct = vp3_idct_mmx;
}
-
+
#ifdef CONFIG_ENCODERS
c->get_pixels = get_pixels_mmx;
c->diff_pixels = diff_pixels_mmx;
diff --git a/libavcodec/i386/vp3dsp_mmx.c b/libavcodec/i386/vp3dsp_mmx.c
index 76007a1d16..319e57f1bb 100644
--- a/libavcodec/i386/vp3dsp_mmx.c
+++ b/libavcodec/i386/vp3dsp_mmx.c
@@ -279,8 +279,8 @@ void vp3_dsp_init_mmx(void)
idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift;
}
-static void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix,
- int16_t *output_data)
+void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, int16_t *output_data)
{
/* eax = quantized input
* ebx = dequantizer matrix
@@ -563,79 +563,3 @@ static void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix,
#undef J
}
-
-void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride)
-{
- int16_t transformed_data[64];
- int16_t *op;
- int i, j;
- uint8_t vector128[8] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
-
- vp3_idct_mmx(input_data, dequant_matrix, transformed_data);
-
- /* place in final output */
- op = transformed_data;
- movq_m2r(*vector128, mm0);
- for (i = 0; i < 8; i++) {
-#if 1
- for (j = 0; j < 8; j++) {
- if (*op < -128)
- *dest = 0;
- else if (*op > 127)
- *dest = 255;
- else
- *dest = (uint8_t)(*op + 128);
- op++;
- dest++;
- }
- dest += (stride - 8);
-#else
-/* prototype optimization */
- pxor_r2r(mm1, mm1);
- packsswb_m2r(*(op + 4), mm1);
- movq_r2r(mm1, mm2);
- psrlq_i2r(32, mm2);
- packsswb_m2r(*(op + 0), mm1);
- op += 8;
- por_r2r(mm2, mm1);
- paddb_r2r(mm0, mm1);
- movq_r2m(mm1, *dest);
- dest += stride;
-#endif
- }
-
- /* be a good MMX citizen */
- emms();
-}
-
-void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride)
-{
- int16_t transformed_data[64];
- int16_t *op;
- int i, j;
- int16_t sample;
-
- vp3_idct_mmx(input_data, dequant_matrix, transformed_data);
-
- /* place in final output */
- op = transformed_data;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- sample = *dest + *op;
- if (sample < 0)
- *dest = 0;
- else if (sample > 255)
- *dest = 255;
- else
- *dest = (uint8_t)(sample & 0xFF);
- op++;
- dest++;
- }
- dest += (stride - 8);
- }
-
- /* be a good MMX citizen */
- emms();
-}
diff --git a/libavcodec/i386/vp3dsp_sse2.c b/libavcodec/i386/vp3dsp_sse2.c
index c8f9158afb..6adfd2f9fa 100644
--- a/libavcodec/i386/vp3dsp_sse2.c
+++ b/libavcodec/i386/vp3dsp_sse2.c
@@ -799,11 +799,12 @@ static unsigned short __align16 SSE2_idct_data[7 * 8] =
void vp3_dsp_init_sse2(void)
{
/* nop */
+av_log(NULL, AV_LOG_INFO, "Hey! SSE2!\n");
}
-static void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
- int16_t *output_data)
+void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, int16_t *output_data)
{
unsigned char *input_bytes = (unsigned char *)input_data;
unsigned char *dequant_matrix_bytes = (unsigned char *)dequant_matrix;
@@ -832,59 +833,3 @@ static void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
SSE2_Column_IDCT();
}
-
-
-void vp3_idct_put_sse2(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride)
-{
- int16_t transformed_data[64];
- int16_t *op;
- int i, j;
-
- vp3_idct_sse2(input_data, dequant_matrix, transformed_data);
-
- /* place in final output */
- op = transformed_data;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- if (*op < -128)
- *dest = 0;
- else if (*op > 127)
- *dest = 255;
- else
- *dest = (uint8_t)(*op + 128);
- op++;
- dest++;
- }
- dest += (stride - 8);
- }
-}
-
-
-void vp3_idct_add_sse2(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride)
-{
- int16_t transformed_data[64];
- int16_t *op;
- int i, j;
- int16_t sample;
-
- vp3_idct_sse2(input_data, dequant_matrix, transformed_data);
-
- /* place in final output */
- op = transformed_data;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- sample = *dest + *op;
- if (sample < 0)
- *dest = 0;
- else if (sample > 255)
- *dest = 255;
- else
- *dest = (uint8_t)(sample & 0xFF);
- op++;
- dest++;
- }
- dest += (stride - 8);
- }
-}
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 0667d99eb8..cf22ee6ce0 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -2051,6 +2051,7 @@ static void render_fragments(Vp3DecodeContext *s,
int m, n;
int i = first_fragment;
int16_t *dequantizer;
+ DCTELEM __align16 output_samples[64];
unsigned char *output_plane;
unsigned char *last_plane;
unsigned char *golden_plane;
@@ -2060,6 +2061,10 @@ static void render_fragments(Vp3DecodeContext *s,
int motion_halfpel_index;
uint8_t *motion_source;
+ int16_t *op;
+ uint8_t *dest;
+ int j, k;
+
debug_vp3(" vp3: rendering final fragments for %s\n",
(plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane");
@@ -2176,16 +2181,29 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
s->all_fragments[i].coeffs[0], dequantizer[0]);
/* invert DCT and place (or add) in final output */
+ s->dsp.vp3_idct(s->all_fragments[i].coeffs,
+ dequantizer,
+ s->all_fragments[i].coeff_count,
+ output_samples);
if (s->all_fragments[i].coding_method == MODE_INTRA) {
- s->dsp.vp3_idct_put(s->all_fragments[i].coeffs,
- dequantizer,
- s->all_fragments[i].coeff_count,
- output_plane + s->all_fragments[i].first_pixel,
- stride);
+ /* this really needs to be optimized sooner or later */
+ op = output_samples;
+ dest = output_plane + s->all_fragments[i].first_pixel;
+ for (j = 0; j < 8; j++) {
+ for (k = 0; k < 8; k++) {
+ if (*op < -128)
+ *dest = 0;
+ else if (*op > 127)
+ *dest = 255;
+ else
+ *dest = (uint8_t)(*op + 128);
+ op++;
+ dest++;
+ }
+ dest += (stride - 8);
+ }
} else {
- s->dsp.vp3_idct_add(s->all_fragments[i].coeffs,
- dequantizer,
- s->all_fragments[i].coeff_count,
+ s->dsp.add_pixels_clamped(output_samples,
output_plane + s->all_fragments[i].first_pixel,
stride);
}
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index ec62d9456d..3ead732803 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -40,8 +40,10 @@ void vp3_dsp_init_c(void)
/* nop */
}
-static void vp3_idct_c(int32_t *dequantized_data, int16_t *output_data)
+void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix,
+ int coeff_count, int16_t *output_data)
{
+ int32_t dequantized_data[64];
int32_t *ip = dequantized_data;
int16_t *op = output_data;
@@ -49,7 +51,13 @@ static void vp3_idct_c(int32_t *dequantized_data, int16_t *output_data)
int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
int32_t t1, t2;
- int i;
+ int i, j;
+
+ /* de-zigzag and dequantize */
+ for (i = 0; i < coeff_count; i++) {
+ j = dezigzag_index[i];
+ dequantized_data[j] = dequant_matrix[i] * input_data[i];
+ }
/* Inverse DCT on the rows now */
for (i = 0; i < 8; i++) {
@@ -248,71 +256,3 @@ static void vp3_idct_c(int32_t *dequantized_data, int16_t *output_data)
op++;
}
}
-
-void vp3_idct_put_c(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride)
-{
- int32_t dequantized_data[64];
- int16_t transformed_data[64];
- int16_t *op;
- int i, j;
-
- /* de-zigzag and dequantize */
- for (i = 0; i < coeff_count; i++) {
- j = dezigzag_index[i];
- dequantized_data[j] = dequant_matrix[i] * input_data[i];
- }
-
- vp3_idct_c(dequantized_data, transformed_data);
-
- /* place in final output */
- op = transformed_data;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- if (*op < -128)
- *dest = 0;
- else if (*op > 127)
- *dest = 255;
- else
- *dest = (uint8_t)(*op + 128);
- op++;
- dest++;
- }
- dest += (stride - 8);
- }
-}
-
-void vp3_idct_add_c(int16_t *input_data, int16_t *dequant_matrix,
- int coeff_count, uint8_t *dest, int stride)
-{
- int32_t dequantized_data[64];
- int16_t transformed_data[64];
- int16_t *op;
- int i, j;
- int16_t sample;
-
- /* de-zigzag and dequantize */
- for (i = 0; i < coeff_count; i++) {
- j = dezigzag_index[i];
- dequantized_data[j] = dequant_matrix[i] * input_data[i];
- }
-
- vp3_idct_c(dequantized_data, transformed_data);
-
- /* place in final output */
- op = transformed_data;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- sample = *dest + *op;
- if (sample < 0)
- *dest = 0;
- else if (sample > 255)
- *dest = 255;
- else
- *dest = (uint8_t)(sample & 0xFF);
- op++;
- dest++;
- }
- dest += (stride - 8);
- }
-}