aboutsummaryrefslogtreecommitdiff
path: root/residual_calc.asm
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-01-09 23:17:06 +0100
committerAnton Khirnov <anton@khirnov.net>2019-01-10 09:14:21 +0100
commitfe66d06cddc60124eb7cd6ba251749432a47111c (patch)
tree0302112ee4d07eaead174b153f898c87ae70251b /residual_calc.asm
parentb7aa818e0fc9d61d9fd37d4d4bbdc3394eef4f29 (diff)
residual_calc.asm: implement writing partial blocks
Avoid overwriting anything over the specified line size.
Diffstat (limited to 'residual_calc.asm')
-rw-r--r--residual_calc.asm29
1 files changed, 27 insertions, 2 deletions
diff --git a/residual_calc.asm b/residual_calc.asm
index 289c3fb..f6f34f3 100644
--- a/residual_calc.asm
+++ b/residual_calc.asm
@@ -208,10 +208,35 @@ SECTION .text
RES_ADD_DIFF_MIXED stencil
; store the result
- movu [dstq + offsetq], m0
add offsetq, mmsize
- js .loop
+ jg .store_partial
+ ; store full block
+ movu [dstq + offsetq - mmsize], m0
+ js .loop
+ jmp .finish
+
+.store_partial:
+ sub offsetq, ELEM_SIZE
+ jz .store1
+ sub offsetq, ELEM_SIZE
+ jz .store2
+
+.store3:
+ ; offsetq is now mmsize-2 after the write position
+ movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0
+ vextractf128 xm0, m0, 1
+ movq [dstq + offsetq - mmsize + 4 * ELEM_SIZE], xm0
+ jmp .finish
+.store2:
+ ; offsetq is now mmsize-2 after the write position
+ movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0
+ jmp .finish
+.store1:
+ ; offsetq is now mmsize-1 after the write position
+ movq [dstq + offsetq - mmsize + ELEM_SIZE], xm0
+
+.finish:
RET
%endmacro