aboutsummaryrefslogtreecommitdiff
path: root/residual_calc.asm
diff options
context:
space:
mode:
Diffstat (limited to 'residual_calc.asm')
-rw-r--r--residual_calc.asm14
1 files changed, 7 insertions, 7 deletions
diff --git a/residual_calc.asm b/residual_calc.asm
index b6cc70e..47dda9b 100644
--- a/residual_calc.asm
+++ b/residual_calc.asm
@@ -217,23 +217,23 @@ SECTION .text
.store_partial:
sub offsetq, ELEM_SIZE
- jz .store1
+ jz .store3
sub offsetq, ELEM_SIZE
jz .store2
-.store3:
+.store1:
; offsetq is now mmsize-2 after the write position
- movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0
- vextractf128 xm0, m0, 1
- movq [dstq + offsetq - mmsize + 4 * ELEM_SIZE], xm0
+ movq [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0
jmp .finish
.store2:
; offsetq is now mmsize-2 after the write position
movu [dstq + offsetq - mmsize + 2 * ELEM_SIZE], xm0
jmp .finish
-.store1:
+.store3:
; offsetq is now mmsize-1 after the write position
- movq [dstq + offsetq - mmsize + ELEM_SIZE], xm0
+ movu [dstq + offsetq - mmsize + 1 * ELEM_SIZE], xm0
+ vextractf128 xm0, m0, 1
+ movq [dstq + offsetq - mmsize + 3 * ELEM_SIZE], xm0
.finish:
RET