diff options
Diffstat (limited to 'libavcodec/x86/h264_deblock.asm')
-rw-r--r-- | libavcodec/x86/h264_deblock.asm | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 509a0dbe0c..93caa67c85 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -377,10 +377,99 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 RET %endmacro +%macro DEBLOCK_H_LUMA_MBAFF 0 + +cglobal deblock_h_luma_mbaff_8, 5, 9, 10, 8*16, pix_, stride_, alpha_, beta_, tc0_, base3_, stride3_ + movsxd stride_q, stride_d + dec alpha_d + dec beta_d + mov base3_q, pix_q + lea stride3_q, [3*stride_q] + add base3_q, stride3_q + + movq m0, [pix_q - 4] + movq m1, [pix_q + stride_q - 4] + movq m2, [pix_q + 2*stride_q - 4] + movq m3, [base3_q - 4] + movq m4, [base3_q + stride_q - 4] + movq m5, [base3_q + 2*stride_q - 4] + movq m6, [base3_q + stride3_q - 4] + movq m7, [base3_q + 4*stride_q - 4] + + TRANSPOSE_8X8B 0,1,2,3,4,5,6,7 + + %assign i 0 + %rep 8 + movq [rsp + 16*i], m %+ i + %assign i i+1 + %endrep + + ; p2 = m1 [rsp + 16] + ; p1 = m2 [rsp + 32] + ; p0 = m3 [rsp + 48] + ; q0 = m4 [rsp + 64] + ; q1 = m5 [rsp + 80] + ; q2 = m6 [rsp + 96] + + SWAP 0, 2 + SWAP 1, 3 + SWAP 2, 4 + SWAP 3, 5 + + LOAD_MASK alpha_d, beta_d + movd m8, [tc0_q] + punpcklbw m8, m8 + pcmpeqb m9, m9 + pcmpeqb m9, m8 + pandn m9, m7 + pand m8, m9 + + movdqa m3, [rsp + 16] ; p2 + DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 + pand m6, m9 + psubb m7, m8, m6 + pand m6, m8 + LUMA_Q1 m0, m3, [rsp + 16], [rsp + 32], m6, m4 + + movdqa m4, [rsp + 96] ; q2 + DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 + pand m6, m9 + pand m8, m6 + psubb m7, m6 + mova m3, [rsp + 80] + LUMA_Q1 m3, m4, [rsp + 96], [rsp + 80], m8, m6 + + DEBLOCK_P0_Q0 + SWAP 1, 3 + SWAP 2, 4 + movq m0, [rsp] + movq m1, [rsp + 16] + movq m2, [rsp + 32] + movq m5, [rsp + 80] + movq m6, [rsp + 96] + movq m7, [rsp + 112] + + TRANSPOSE_8X8B 0,1,2,3,4,5,6,7 + movq [pix_q - 4], m0 + movq [pix_q + stride_q - 4], m1 + movq [pix_q + 2*stride_q - 4], m2 + movq [base3_q - 4], m3 + movq [base3_q + stride_q - 4], m4 + movq [base3_q + 2*stride_q - 4], m5 + movq [base3_q + stride3_q - 4], m6 + movq [base3_q + 4*stride_q - 4], m7 + +RET + +%endmacro + INIT_XMM sse2 +DEBLOCK_H_LUMA_MBAFF DEBLOCK_LUMA + %if HAVE_AVX_EXTERNAL INIT_XMM avx +DEBLOCK_H_LUMA_MBAFF DEBLOCK_LUMA %endif |