1/* Function sincos vectorized with SSE2.
2 Copyright (C) 2014-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVbN2vl8l8_sincos)
24WRAPPER_IMPL_SSE2_fFF sincos
25END (_ZGVbN2vl8l8_sincos)
26libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
27
28/* SSE2 ISA version as wrapper to scalar (for vector
29 function declared with #pragma omp declare simd notinbranch). */
30.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
31#ifndef __ILP32__
32 subq $88, %rsp
33 cfi_adjust_cfa_offset(88)
34 movaps %xmm0, 64(%rsp)
35 lea (%rsp), %rdi
36 movdqa %xmm1, 32(%rdi)
37 lea 16(%rsp), %rsi
38 movdqa %xmm2, 32(%rsi)
39 call JUMPTARGET(\callee)
40 movsd 72(%rsp), %xmm0
41 lea 8(%rsp), %rdi
42 lea 24(%rsp), %rsi
43 call JUMPTARGET(\callee)
44 movq 32(%rsp), %rdx
45 movq 48(%rsp), %rsi
46 movq 40(%rsp), %r8
47 movq 56(%rsp), %r10
48 movq (%rsp), %rax
49 movq 16(%rsp), %rcx
50 movq 8(%rsp), %rdi
51 movq 24(%rsp), %r9
52 movq %rax, (%rdx)
53 movq %rcx, (%rsi)
54 movq %rdi, (%r8)
55 movq %r9, (%r10)
56 addq $88, %rsp
57 cfi_adjust_cfa_offset(-88)
58 ret
59#else
60 pushq %rbp
61 .cfi_def_cfa_offset 16
62 .cfi_offset 6, -16
63 pushq %rbx
64 .cfi_def_cfa_offset 24
65 .cfi_offset 3, -24
66 subl $88, %esp
67 .cfi_def_cfa_offset 112
68 leal 64(%rsp), %esi
69 movaps %xmm1, 32(%esp)
70 leal 48(%rsp), %edi
71 movaps %xmm2, 16(%esp)
72 movq %rsi, %rbp
73 movq %rdi, %rbx
74 movaps %xmm0, (%esp)
75 call JUMPTARGET(\callee)
76 movupd 8(%esp), %xmm0
77 leal 8(%rbp), %esi
78 leal 8(%rbx), %edi
79 call JUMPTARGET(\callee)
80 movdqa 32(%esp), %xmm1
81 movsd 48(%esp), %xmm0
82 movq %xmm1, %rax
83 movdqa 16(%esp), %xmm2
84 movsd %xmm0, (%eax)
85 movsd 56(%esp), %xmm0
86 pextrd $1, %xmm1, %eax
87 movsd %xmm0, (%eax)
88 movsd 64(%esp), %xmm0
89 movq %xmm2, %rax
90 movsd %xmm0, (%eax)
91 movsd 72(%esp), %xmm0
92 pextrd $1, %xmm2, %eax
93 movsd %xmm0, (%eax)
94 addl $88, %esp
95 .cfi_def_cfa_offset 24
96 popq %rbx
97 .cfi_def_cfa_offset 16
98 popq %rbp
99 .cfi_def_cfa_offset 8
100 ret
101#endif
102.endm
103
104ENTRY (_ZGVbN2vvv_sincos)
105WRAPPER_IMPL_SSE2_fFF_vvv sincos
106END (_ZGVbN2vvv_sincos)
107
108#ifndef USE_MULTIARCH
109 libmvec_hidden_def (_ZGVbN2vvv_sincos)
110#endif
111