summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Garrett-Glaser <jason@x264.com>2011-02-05 21:19:52 -0800
committerJason Garrett-Glaser <jason@x264.com>2011-02-08 15:59:24 -0800
commit62457f9052ea15123688455aad866cb070634f13 (patch)
tree95919baa5118c5e0d1e69181f8b979e04279c4ae
parent17cf7c68ed26a4cb3c7adf7488a38c2e19118918 (diff)
VP8: idct_mb optimizations
Currently uses AV_RL32 instead of AV_RL32A, as the latter doesn't exist yet.
-rw-r--r--libavcodec/vp8.c32
1 files changed, 16 insertions, 16 deletions
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index b10330af03..c078f2ec5c 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1421,17 +1421,17 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
if (mb->mode != MODE_I4x4) {
uint8_t *y_dst = dst[0];
for (y = 0; y < 4; y++) {
- uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[y]);
+ uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
if (nnz4) {
if (nnz4&~0x01010101) {
for (x = 0; x < 4; x++) {
- int nnz = s->non_zero_count_cache[y][x];
- if (nnz) {
- if (nnz == 1)
- s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
- else
- s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
- }
+ if ((uint8_t)nnz4 == 1)
+ s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
+ else if((uint8_t)nnz4 > 1)
+ s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
+ nnz4 >>= 8;
+ if (!nnz4)
+ break;
}
} else {
s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
@@ -1442,19 +1442,19 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
}
for (ch = 0; ch < 2; ch++) {
- uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]);
+ uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
if (nnz4) {
uint8_t *ch_dst = dst[1+ch];
if (nnz4&~0x01010101) {
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
- int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x];
- if (nnz) {
- if (nnz == 1)
- s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
- else
- s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
- }
+ if ((uint8_t)nnz4 == 1)
+ s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
+ else if((uint8_t)nnz4 > 1)
+ s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
+ nnz4 >>= 8;
+ if (!nnz4)
+ break;
}
ch_dst += 4*s->uvlinesize;
}