aboutsummaryrefslogtreecommitdiff
path: root/util.asm
blob: 846ea3b87efe9c212ee76b6f535aeb849b311af9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
;
; Various ASM utility macros
; Copyright 2019 Anton Khirnov <anton@khirnov.net>
;
; This program is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation, either version 3 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program.  If not, see <http://www.gnu.org/licenses/>.

%define _IMM8SPLAT2B(x) (((x) << 6) | ((x) << 4) | ((x) << 2) | (x))
%define _IMM4SPLAT1B(x) (((x) << 3) | ((x) << 2) | ((x) << 1) | (x))

; splat packed double - copy the specified double into all positions in the
; destination

; %1 destination register
; %2 source register (may be same as dst)
; %3 index of the element in the source to splat
%macro SPLATPD 3
    %if %3 > 3
        %error Invalid selector %3
    %endif

    %if mmsize == 32 && cpuflag(avx2)
        vpermpd %1, %2, _IMM8SPLAT2B(%3)
    %elif mmsize == 32 && cpuflag(avx)
        shufpd %1, %2, %2, _IMM4SPLAT1B(%3 & 1)
        vperm2f128 %1, %1, ((%3 & 2) >> 1) * 0x11
    %else
        %error %? not supported with cpuname
    %endif
%endmacro