summaryrefslogtreecommitdiff
path: root/readwrite.asm
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-04-15 18:22:14 +0200
committerAnton Khirnov <anton@khirnov.net>2019-04-15 18:32:52 +0200
commit34917b834143c180562fec66a53d1180251a4b7d (patch)
treee745ada4b544874302cbf816a744af255d44dc21 /readwrite.asm
Initial commit.
Diffstat (limited to 'readwrite.asm')
-rw-r--r--readwrite.asm67
1 files changed, 67 insertions, 0 deletions
diff --git a/readwrite.asm b/readwrite.asm
new file mode 100644
index 0000000..b498f63
--- /dev/null
+++ b/readwrite.asm
@@ -0,0 +1,67 @@
+;
+; Copyright 2019 Anton Khirnov <anton@khirnov.net>
+;
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version.
+;
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;
+
+%include "config.asm"
+%include "x86inc.asm"
+
+SECTION .rodata align=64
+
+const1: times 8 dq 1.0
+
+SECTION .text
+
+INIT_YMM avx
+cglobal mem_write, 2, 2, 1, dst, dstlen
+ add dstq, dstlenq
+ neg dstlenq
+
+ mova m0, [const1]
+
+.loop:
+ mova [dstq + dstlenq + mmsize * 0], m0
+ mova [dstq + dstlenq + mmsize * 1], m0
+ mova [dstq + dstlenq + mmsize * 2], m0
+ mova [dstq + dstlenq + mmsize * 3], m0
+ mova [dstq + dstlenq + mmsize * 4], m0
+ mova [dstq + dstlenq + mmsize * 5], m0
+ mova [dstq + dstlenq + mmsize * 6], m0
+ mova [dstq + dstlenq + mmsize * 7], m0
+
+ add dstlenq, mmsize * 8
+ js .loop
+
+ RET
+
+INIT_YMM avx
+cglobal mem_read, 2, 2, 1, src, srclen
+ add srcq, srclenq
+ neg srclenq
+
+.loop:
+ mova m0, [srcq + srclenq]
+ mova m0, [srcq + srclenq + mmsize]
+ mova m0, [srcq + srclenq + mmsize * 2]
+ mova m0, [srcq + srclenq + mmsize * 3]
+ mova m0, [srcq + srclenq + mmsize * 4]
+ mova m0, [srcq + srclenq + mmsize * 5]
+ mova m0, [srcq + srclenq + mmsize * 6]
+ mova m0, [srcq + srclenq + mmsize * 7]
+
+ add srclenq, mmsize * 8
+ js .loop
+
+ RET