1/* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
2 Copyright (C) 2014-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVeN8vl8l8_sincos)
24WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
25END (_ZGVeN8vl8l8_sincos)
26
27/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
28 function declared with #pragma omp declare simd notinbranch). */
29.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
30#ifndef __ILP32__
31 pushq %rbp
32 cfi_adjust_cfa_offset (8)
33 cfi_rel_offset (%rbp, 0)
34 movq %rsp, %rbp
35 cfi_def_cfa_register (%rbp)
36 andq $-64, %rsp
37 subq $320, %rsp
38 /* Encoding for vmovups %zmm0, 256(%rsp). */
39 .byte 0x62
40 .byte 0xf1
41 .byte 0x7c
42 .byte 0x48
43 .byte 0x11
44 .byte 0x44
45 .byte 0x24
46 .byte 0x04
47 lea (%rsp), %rdi
48 /* Encoding for vmovups %zmm1, 128(%rdi). */
49 .byte 0x62
50 .byte 0xf1
51 .byte 0x7c
52 .byte 0x48
53 .byte 0x11
54 .byte 0x4f
55 .byte 0x02
56 /* Encoding for vmovups %zmm2, 192(%rdi). */
57 .byte 0x62
58 .byte 0xf1
59 .byte 0x7c
60 .byte 0x48
61 .byte 0x11
62 .byte 0x57
63 .byte 0x03
64 lea 64(%rsp), %rsi
65 call HIDDEN_JUMPTARGET(\callee)
66 vmovdqu 288(%rsp), %ymm0
67 lea 32(%rsp), %rdi
68 lea 96(%rsp), %rsi
69 call HIDDEN_JUMPTARGET(\callee)
70 movq 128(%rsp), %rdx
71 movq 192(%rsp), %rsi
72 movq 136(%rsp), %r8
73 movq 200(%rsp), %r10
74 movq (%rsp), %rax
75 movq 64(%rsp), %rcx
76 movq 8(%rsp), %rdi
77 movq 72(%rsp), %r9
78 movq %rax, (%rdx)
79 movq %rcx, (%rsi)
80 movq 144(%rsp), %rax
81 movq 208(%rsp), %rcx
82 movq %rdi, (%r8)
83 movq %r9, (%r10)
84 movq 152(%rsp), %rdi
85 movq 216(%rsp), %r9
86 movq 16(%rsp), %r11
87 movq 80(%rsp), %rdx
88 movq 24(%rsp), %rsi
89 movq 88(%rsp), %r8
90 movq %r11, (%rax)
91 movq %rdx, (%rcx)
92 movq 160(%rsp), %r11
93 movq 224(%rsp), %rdx
94 movq %rsi, (%rdi)
95 movq %r8, (%r9)
96 movq 168(%rsp), %rsi
97 movq 232(%rsp), %r8
98 movq 32(%rsp), %r10
99 movq 96(%rsp), %rax
100 movq 40(%rsp), %rcx
101 movq 104(%rsp), %rdi
102 movq %r10, (%r11)
103 movq %rax, (%rdx)
104 movq 176(%rsp), %r10
105 movq 240(%rsp), %rax
106 movq %rcx, (%rsi)
107 movq %rdi, (%r8)
108 movq 184(%rsp), %rcx
109 movq 248(%rsp), %rdi
110 movq 48(%rsp), %r9
111 movq 112(%rsp), %r11
112 movq 56(%rsp), %rdx
113 movq 120(%rsp), %rsi
114 movq %r9, (%r10)
115 movq %r11, (%rax)
116 movq %rdx, (%rcx)
117 movq %rsi, (%rdi)
118 movq %rbp, %rsp
119 cfi_def_cfa_register (%rsp)
120 popq %rbp
121 cfi_adjust_cfa_offset (-8)
122 cfi_restore (%rbp)
123 ret
124#else
125 leal 8(%rsp), %r10d
126 .cfi_def_cfa 10, 0
127 andl $-64, %esp
128 pushq -8(%r10d)
129 pushq %rbp
130 .cfi_escape 0x10,0x6,0x2,0x76,0
131 movl %esp, %ebp
132 pushq %r12
133 leal -112(%rbp), %esi
134 pushq %r10
135 .cfi_escape 0xf,0x3,0x76,0x70,0x6
136 .cfi_escape 0x10,0xc,0x2,0x76,0x78
137 leal -176(%rbp), %edi
138 movq %rsi, %r12
139 pushq %rbx
140 .cfi_escape 0x10,0x3,0x2,0x76,0x68
141 movq %rdi, %rbx
142 subl $280, %esp
143 vmovdqa %ymm1, -208(%ebp)
144 vmovdqa %ymm2, -240(%ebp)
145 /* Encoding for vmovapd %zmm0, -304(%ebp). */
146 .byte 0x67
147 .byte 0x62
148 .byte 0xf1
149 .byte 0xfd
150 .byte 0x48
151 .byte 0x29
152 .byte 0x85
153 .byte 0xd0
154 .byte 0xfe
155 .byte 0xff
156 .byte 0xff
157 call HIDDEN_JUMPTARGET(\callee)
158 leal 32(%r12), %esi
159 vmovupd -272(%ebp), %ymm0
160 leal 32(%rbx), %edi
161 call HIDDEN_JUMPTARGET(\callee)
162 movl -208(%ebp), %eax
163 vmovsd -176(%ebp), %xmm0
164 vmovsd %xmm0, (%eax)
165 movl -204(%ebp), %eax
166 vmovsd -168(%ebp), %xmm0
167 vmovsd %xmm0, (%eax)
168 movl -200(%ebp), %eax
169 vmovsd -160(%ebp), %xmm0
170 vmovsd %xmm0, (%eax)
171 movl -196(%ebp), %eax
172 vmovsd -152(%ebp), %xmm0
173 vmovsd %xmm0, (%eax)
174 movl -192(%ebp), %eax
175 vmovsd -144(%ebp), %xmm0
176 vmovsd %xmm0, (%eax)
177 movl -188(%ebp), %eax
178 vmovsd -136(%ebp), %xmm0
179 vmovsd %xmm0, (%eax)
180 movl -184(%ebp), %eax
181 vmovsd -128(%ebp), %xmm0
182 vmovsd %xmm0, (%eax)
183 movl -180(%ebp), %eax
184 vmovsd -120(%ebp), %xmm0
185 vmovsd %xmm0, (%eax)
186 movl -240(%ebp), %eax
187 vmovsd -112(%ebp), %xmm0
188 vmovsd %xmm0, (%eax)
189 movl -236(%ebp), %eax
190 vmovsd -104(%ebp), %xmm0
191 vmovsd %xmm0, (%eax)
192 movl -232(%ebp), %eax
193 vmovsd -96(%ebp), %xmm0
194 vmovsd %xmm0, (%eax)
195 movl -228(%ebp), %eax
196 vmovsd -88(%ebp), %xmm0
197 vmovsd %xmm0, (%eax)
198 movl -224(%ebp), %eax
199 vmovsd -80(%ebp), %xmm0
200 vmovsd %xmm0, (%eax)
201 movl -220(%ebp), %eax
202 vmovsd -72(%ebp), %xmm0
203 vmovsd %xmm0, (%eax)
204 movl -216(%ebp), %eax
205 vmovsd -64(%ebp), %xmm0
206 vmovsd %xmm0, (%eax)
207 movl -212(%ebp), %eax
208 vmovsd -56(%ebp), %xmm0
209 vmovsd %xmm0, (%eax)
210 addl $280, %esp
211 popq %rbx
212 popq %r10
213 .cfi_def_cfa 10, 0
214 popq %r12
215 popq %rbp
216 leal -8(%r10), %esp
217 .cfi_def_cfa 7, 8
218 ret
219#endif
220.endm
221
222ENTRY (_ZGVeN8vvv_sincos)
223WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
224END (_ZGVeN8vvv_sincos)
225