1/* Function sincos vectorized with AVX2, wrapper version.
2 Copyright (C) 2014-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVdN4vl8l8_sincos)
24WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
25END (_ZGVdN4vl8l8_sincos)
26libmvec_hidden_def (_ZGVdN4vl8l8_sincos)
27
28/* AVX2 ISA version as wrapper to SSE ISA version (for vector
29 function declared with #pragma omp declare simd notinbranch). */
30.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
31#ifndef __ILP32__
32 pushq %rbp
33 cfi_adjust_cfa_offset (8)
34 cfi_rel_offset (%rbp, 0)
35 movq %rsp, %rbp
36 cfi_def_cfa_register (%rbp)
37 andq $-32, %rsp
38 subq $160, %rsp
39 vmovupd %ymm0, 128(%rsp)
40 lea (%rsp), %rdi
41 vmovdqu %ymm1, 64(%rdi)
42 vmovdqu %ymm2, 96(%rdi)
43 lea 32(%rsp), %rsi
44 vzeroupper
45 call HIDDEN_JUMPTARGET(\callee)
46 vmovupd 144(%rsp), %xmm0
47 lea 16(%rsp), %rdi
48 lea 48(%rsp), %rsi
49 call HIDDEN_JUMPTARGET(\callee)
50 movq 64(%rsp), %rdx
51 movq 96(%rsp), %rsi
52 movq 72(%rsp), %r8
53 movq 104(%rsp), %r10
54 movq (%rsp), %rax
55 movq 32(%rsp), %rcx
56 movq 8(%rsp), %rdi
57 movq 40(%rsp), %r9
58 movq %rax, (%rdx)
59 movq %rcx, (%rsi)
60 movq 80(%rsp), %rax
61 movq 112(%rsp), %rcx
62 movq %rdi, (%r8)
63 movq %r9, (%r10)
64 movq 88(%rsp), %rdi
65 movq 120(%rsp), %r9
66 movq 16(%rsp), %r11
67 movq 48(%rsp), %rdx
68 movq 24(%rsp), %rsi
69 movq 56(%rsp), %r8
70 movq %r11, (%rax)
71 movq %rdx, (%rcx)
72 movq %rsi, (%rdi)
73 movq %r8, (%r9)
74 movq %rbp, %rsp
75 cfi_def_cfa_register (%rsp)
76 popq %rbp
77 cfi_adjust_cfa_offset (-8)
78 cfi_restore (%rbp)
79 ret
80#else
81 leal 8(%rsp), %r10d
82 .cfi_def_cfa 10, 0
83 andl $-32, %esp
84 pushq -8(%r10d)
85 pushq %rbp
86 .cfi_escape 0x10,0x6,0x2,0x76,0
87 movl %esp, %ebp
88 pushq %r12
89 leal -80(%rbp), %esi
90 pushq %r10
91 .cfi_escape 0xf,0x3,0x76,0x70,0x6
92 .cfi_escape 0x10,0xc,0x2,0x76,0x78
93 leal -112(%rbp), %edi
94 movq %rsi, %r12
95 pushq %rbx
96 .cfi_escape 0x10,0x3,0x2,0x76,0x68
97 movq %rdi, %rbx
98 subl $152, %esp
99 vmovaps %xmm1, -128(%ebp)
100 vmovaps %xmm2, -144(%ebp)
101 vmovapd %ymm0, -176(%ebp)
102 vzeroupper
103 call HIDDEN_JUMPTARGET(\callee)
104 leal 16(%r12), %esi
105 vmovapd -160(%ebp), %xmm0
106 leal 16(%rbx), %edi
107 call HIDDEN_JUMPTARGET(\callee)
108 movq -128(%ebp), %rax
109 vmovsd -112(%ebp), %xmm0
110 vmovdqa -128(%ebp), %xmm5
111 vmovdqa -144(%ebp), %xmm1
112 vmovsd %xmm0, (%eax)
113 vmovsd -104(%ebp), %xmm0
114 vpextrd $1, %xmm5, %eax
115 vmovsd %xmm0, (%eax)
116 movq -120(%ebp), %rax
117 vmovsd -96(%ebp), %xmm0
118 vmovsd %xmm0, (%eax)
119 vmovsd -88(%ebp), %xmm0
120 vpextrd $3, %xmm5, %eax
121 vmovsd %xmm0, (%eax)
122 movq -144(%ebp), %rax
123 vmovsd -80(%ebp), %xmm0
124 vmovsd %xmm0, (%eax)
125 vmovsd -72(%ebp), %xmm0
126 vpextrd $1, %xmm1, %eax
127 vmovsd %xmm0, (%eax)
128 movq -136(%ebp), %rax
129 vmovsd -64(%ebp), %xmm0
130 vmovsd %xmm0, (%eax)
131 vmovsd -56(%ebp), %xmm0
132 vpextrd $3, %xmm1, %eax
133 vmovsd %xmm0, (%eax)
134 addl $152, %esp
135 popq %rbx
136 popq %r10
137 .cfi_def_cfa 10, 0
138 popq %r12
139 popq %rbp
140 leal -8(%r10), %esp
141 .cfi_def_cfa 7, 8
142 ret
143#endif
144.endm
145
146ENTRY (_ZGVdN4vvv_sincos)
147WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos
148END (_ZGVdN4vvv_sincos)
149
150#ifndef USE_MULTIARCH
151 libmvec_hidden_def (_ZGVdN4vvv_sincos)
152#endif
153