From fe66d06cddc60124eb7cd6ba251749432a47111c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 9 Jan 2019 23:17:06 +0100 Subject: residual_calc.asm: implement writing partial blocks Avoid overwriting anything over the specified line size. --- residual_calc.asm | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'residual_calc.asm') diff --git a/residual_calc.asm b/residual_calc.asm index 289c3fb..f6f34f3 100644 --- a/residual_calc.asm +++ b/residual_calc.asm @@ -208,10 +208,35 @@ SECTION .text RES_ADD_DIFF_MIXED stencil ; store the result - movu [dstq + offsetq], m0 add offsetq, mmsize - js .loop + jg .store_partial + ; store full block + movu [dstq + offsetq - mmsize], m0 + js .loop + jmp .finish + +.store_partial: + sub offsetq, ELEM_SIZE + jz .store1 + sub offsetq, ELEM_SIZE + jz .store2 + +.store3: + ; offsetq is now mmsize-2 after the write position + movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0 + vextractf128 xm0, m0, 1 + movq [dstq + offsetq - mmsize + 4 * ELEM_SIZE], xm0 + jmp .finish +.store2: + ; offsetq is now mmsize-2 after the write position + movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0 + jmp .finish +.store1: + ; offsetq is now mmsize-1 after the write position + movq [dstq + offsetq - mmsize + ELEM_SIZE], xm0 + +.finish: RET %endmacro -- cgit v1.2.3