summaryrefslogtreecommitdiff
path: root/libavcodec/h264_mb_template.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-02-17 14:52:24 -0800
committerMartin Storsjö <martin@martin.st>2013-02-19 22:34:14 +0200
commit7ebfb466aec2c4628fcd42a72b29034efcaba4bc (patch)
treede856f2702d6366c7d5b22993d10a1fff2dca823 /libavcodec/h264_mb_template.c
parent9918f57dcfc276cc8ecad3705875ca5877980c7a (diff)
h264: Don't store intra pcm samples in h->mb
Instead, keep them in the bitstream buffer until we read them verbatim, this saves a memcpy() and a subsequent clearing of the target buffer. decode_cabac+decode_mb for a sample file (CAPM3_Sony_D.jsv) goes from 6121.4 to 6095.5 cycles, i.e. 26 cycles faster. Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/h264_mb_template.c')
-rw-r--r--libavcodec/h264_mb_template.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index fdefed4304..1c3b32db17 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -102,7 +102,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
const int bit_depth = h->sps.bit_depth_luma;
int j;
GetBitContext gb;
- init_get_bits(&gb, (uint8_t *)h->mb,
+ init_get_bits(&gb, (uint8_t *)h->intra_pcm_ptr,
ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);
for (i = 0; i < 16; i++) {
@@ -137,7 +137,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
}
} else {
for (i = 0; i < 16; i++)
- memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
+ memcpy(dest_y + i * linesize, (uint8_t *)h->intra_pcm_ptr + i * 16, 16);
if (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
if (!h->sps.chroma_format_idc) {
for (i = 0; i < block_h; i++) {
@@ -145,8 +145,8 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
memset(dest_cr + i * uvlinesize, 128, 8);
}
} else {
- uint8_t *src_cb = (uint8_t *)h->mb + 256;
- uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
+ uint8_t *src_cb = (uint8_t *)h->intra_pcm_ptr + 256;
+ uint8_t *src_cr = (uint8_t *)h->intra_pcm_ptr + 256 + block_h * 8;
for (i = 0; i < block_h; i++) {
memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
@@ -261,10 +261,10 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
}
}
}
- }
- if (h->cbp || IS_INTRA(mb_type)) {
- h->dsp.clear_blocks(h->mb);
- h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
+ if (h->cbp || IS_INTRA(mb_type)) {
+ h->dsp.clear_blocks(h->mb);
+ h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
+ }
}
}
@@ -328,7 +328,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
if (PIXEL_SHIFT) {
const int bit_depth = h->sps.bit_depth_luma;
GetBitContext gb;
- init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth);
+ init_get_bits(&gb, (uint8_t *)h->intra_pcm_ptr, 768 * bit_depth);
for (p = 0; p < plane_count; p++)
for (i = 0; i < 16; i++) {
@@ -340,7 +340,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
for (p = 0; p < plane_count; p++)
for (i = 0; i < 16; i++)
memcpy(dest[p] + i * linesize,
- (uint8_t *)h->mb + p * 256 + i * 16, 16);
+ (uint8_t *)h->intra_pcm_ptr + p * 256 + i * 16, 16);
}
} else {
if (IS_INTRA(mb_type)) {
@@ -368,10 +368,11 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
hl_decode_mb_idct_luma(h, mb_type, 1, SIMPLE, transform_bypass,
PIXEL_SHIFT, block_offset, linesize,
dest[p], p);
- }
- if (h->cbp || IS_INTRA(mb_type)) {
- h->dsp.clear_blocks(h->mb);
- h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
+
+ if (h->cbp || IS_INTRA(mb_type)) {
+ h->dsp.clear_blocks(h->mb);
+ h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
+ }
}
}