diff options
author | Anton Khirnov <anton@khirnov.net> | 2019-01-09 23:17:06 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2019-01-10 09:14:21 +0100 |
commit | fe66d06cddc60124eb7cd6ba251749432a47111c (patch) | |
tree | 0302112ee4d07eaead174b153f898c87ae70251b | |
parent | b7aa818e0fc9d61d9fd37d4d4bbdc3394eef4f29 (diff) |
residual_calc.asm: implement writing partial blocks
Avoid overwriting anything over the specified line size.
-rw-r--r-- | residual_calc.asm | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/residual_calc.asm b/residual_calc.asm index 289c3fb..f6f34f3 100644 --- a/residual_calc.asm +++ b/residual_calc.asm @@ -208,10 +208,35 @@ SECTION .text RES_ADD_DIFF_MIXED stencil ; store the result - movu [dstq + offsetq], m0 add offsetq, mmsize - js .loop + jg .store_partial + ; store full block + movu [dstq + offsetq - mmsize], m0 + js .loop + jmp .finish + +.store_partial: + sub offsetq, ELEM_SIZE + jz .store1 + sub offsetq, ELEM_SIZE + jz .store2 + +.store3: + ; offsetq is now mmsize-2 after the write position + movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0 + vextractf128 xm0, m0, 1 + movq [dstq + offsetq - mmsize + 4 * ELEM_SIZE], xm0 + jmp .finish +.store2: + ; offsetq is now mmsize-2 after the write position + movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0 + jmp .finish +.store1: + ; offsetq is now mmsize-1 after the write position + movq [dstq + offsetq - mmsize + ELEM_SIZE], xm0 + +.finish: RET %endmacro |