1/* PLT trampolines. x86-64 version.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 .text
20#ifdef _dl_runtime_resolve
21
22# undef REGISTER_SAVE_AREA
23# undef LOCAL_STORAGE_AREA
24# undef BASE
25
26# if (STATE_SAVE_ALIGNMENT % 16) != 0
27# error STATE_SAVE_ALIGNMENT must be multples of 16
28# endif
29
30# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
31# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
32# endif
33
34# if DL_RUNTIME_RESOLVE_REALIGN_STACK
35/* Local stack area before jumping to function address: RBX. */
36# define LOCAL_STORAGE_AREA 8
37# define BASE rbx
38# ifdef USE_FXSAVE
39/* Use fxsave to save XMM registers. */
40# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
41# if (REGISTER_SAVE_AREA % 16) != 0
42# error REGISTER_SAVE_AREA must be multples of 16
43# endif
44# endif
45# else
46# ifndef USE_FXSAVE
47# error USE_FXSAVE must be defined
48# endif
49/* Use fxsave to save XMM registers. */
50# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
51/* Local stack area before jumping to function address: All saved
52 registers. */
53# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
54# define BASE rsp
55# if (REGISTER_SAVE_AREA % 16) != 8
56# error REGISTER_SAVE_AREA must be odd multples of 8
57# endif
58# endif
59
60 .globl _dl_runtime_resolve
61 .hidden _dl_runtime_resolve
62 .type _dl_runtime_resolve, @function
63 .align 16
64 cfi_startproc
65_dl_runtime_resolve:
66 cfi_adjust_cfa_offset(16) # Incorporate PLT
67 _CET_ENDBR
68# if DL_RUNTIME_RESOLVE_REALIGN_STACK
69# if LOCAL_STORAGE_AREA != 8
70# error LOCAL_STORAGE_AREA must be 8
71# endif
72 pushq %rbx # push subtracts stack by 8.
73 cfi_adjust_cfa_offset(8)
74 cfi_rel_offset(%rbx, 0)
75 mov %RSP_LP, %RBX_LP
76 cfi_def_cfa_register(%rbx)
77 and $-STATE_SAVE_ALIGNMENT, %RSP_LP
78# endif
79# ifdef REGISTER_SAVE_AREA
80 sub $REGISTER_SAVE_AREA, %RSP_LP
81# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
82 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
83# endif
84# else
85 # Allocate stack space of the required size to save the state.
86# if IS_IN (rtld)
87 sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
88# else
89 sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
90# endif
91# endif
92 # Preserve registers otherwise clobbered.
93 movq %rax, REGISTER_SAVE_RAX(%rsp)
94 movq %rcx, REGISTER_SAVE_RCX(%rsp)
95 movq %rdx, REGISTER_SAVE_RDX(%rsp)
96 movq %rsi, REGISTER_SAVE_RSI(%rsp)
97 movq %rdi, REGISTER_SAVE_RDI(%rsp)
98 movq %r8, REGISTER_SAVE_R8(%rsp)
99 movq %r9, REGISTER_SAVE_R9(%rsp)
100# ifdef USE_FXSAVE
101 fxsave STATE_SAVE_OFFSET(%rsp)
102# else
103 movl $STATE_SAVE_MASK, %eax
104 xorl %edx, %edx
105 # Clear the XSAVE Header.
106# ifdef USE_XSAVE
107 movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
108 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
109# endif
110 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
111 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
112 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
113 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
114 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
115 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
116# ifdef USE_XSAVE
117 xsave STATE_SAVE_OFFSET(%rsp)
118# else
119 xsavec STATE_SAVE_OFFSET(%rsp)
120# endif
121# endif
122 # Copy args pushed by PLT in register.
123 # %rdi: link_map, %rsi: reloc_index
124 mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
125 mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
126 call _dl_fixup # Call resolver.
127 mov %RAX_LP, %R11_LP # Save return value
128 # Get register content back.
129# ifdef USE_FXSAVE
130 fxrstor STATE_SAVE_OFFSET(%rsp)
131# else
132 movl $STATE_SAVE_MASK, %eax
133 xorl %edx, %edx
134 xrstor STATE_SAVE_OFFSET(%rsp)
135# endif
136 movq REGISTER_SAVE_R9(%rsp), %r9
137 movq REGISTER_SAVE_R8(%rsp), %r8
138 movq REGISTER_SAVE_RDI(%rsp), %rdi
139 movq REGISTER_SAVE_RSI(%rsp), %rsi
140 movq REGISTER_SAVE_RDX(%rsp), %rdx
141 movq REGISTER_SAVE_RCX(%rsp), %rcx
142 movq REGISTER_SAVE_RAX(%rsp), %rax
143# if DL_RUNTIME_RESOLVE_REALIGN_STACK
144 mov %RBX_LP, %RSP_LP
145 cfi_def_cfa_register(%rsp)
146 movq (%rsp), %rbx
147 cfi_restore(%rbx)
148# endif
149 # Adjust stack(PLT did 2 pushes)
150 add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
151 cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
152 # Preserve bound registers.
153 PRESERVE_BND_REGS_PREFIX
154 jmp *%r11 # Jump to function address.
155 cfi_endproc
156 .size _dl_runtime_resolve, .-_dl_runtime_resolve
157#endif
158
159
160#if !defined PROF && defined _dl_runtime_profile
161# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
162# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
163# endif
164
165 .globl _dl_runtime_profile
166 .hidden _dl_runtime_profile
167 .type _dl_runtime_profile, @function
168 .align 16
169_dl_runtime_profile:
170 cfi_startproc
171 cfi_adjust_cfa_offset(16) # Incorporate PLT
172 _CET_ENDBR
173 /* The La_x86_64_regs data structure pointed to by the
174 fourth paramater must be VEC_SIZE-byte aligned. This must
175 be explicitly enforced. We have the set up a dynamically
176 sized stack frame. %rbx points to the top half which
177 has a fixed size and preserves the original stack pointer. */
178
179 sub $32, %RSP_LP # Allocate the local storage.
180 cfi_adjust_cfa_offset(32)
181 movq %rbx, (%rsp)
182 cfi_rel_offset(%rbx, 0)
183
184 /* On the stack:
185 56(%rbx) parameter #1
186 48(%rbx) return address
187
188 40(%rbx) reloc index
189 32(%rbx) link_map
190
191 24(%rbx) La_x86_64_regs pointer
192 16(%rbx) framesize
193 8(%rbx) rax
194 (%rbx) rbx
195 */
196
197 movq %rax, 8(%rsp)
198 mov %RSP_LP, %RBX_LP
199 cfi_def_cfa_register(%rbx)
200
201 /* Actively align the La_x86_64_regs structure. */
202 and $-VEC_SIZE, %RSP_LP
203 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
204 to detect if any xmm0-xmm7 registers are changed by audit
205 module. */
206 sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
207 movq %rsp, 24(%rbx)
208
209 /* Fill the La_x86_64_regs structure. */
210 movq %rdx, LR_RDX_OFFSET(%rsp)
211 movq %r8, LR_R8_OFFSET(%rsp)
212 movq %r9, LR_R9_OFFSET(%rsp)
213 movq %rcx, LR_RCX_OFFSET(%rsp)
214 movq %rsi, LR_RSI_OFFSET(%rsp)
215 movq %rdi, LR_RDI_OFFSET(%rsp)
216 movq %rbp, LR_RBP_OFFSET(%rsp)
217
218 lea 48(%rbx), %RAX_LP
219 movq %rax, LR_RSP_OFFSET(%rsp)
220
221 /* We always store the XMM registers even if AVX is available.
222 This is to provide backward binary compatibility for existing
223 audit modules. */
224 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
225 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
226 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
227 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
228 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
229 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
230 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
231 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
232
233# ifndef __ILP32__
234# ifdef HAVE_MPX_SUPPORT
235 bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
236 bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
237 bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
238 bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
239# else
240 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
241 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
242 .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
243 .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
244# endif
245# endif
246
247# ifdef RESTORE_AVX
248 /* This is to support AVX audit modules. */
249 VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
250 VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
251 VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
252 VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
253 VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
254 VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
255 VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
256 VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
257
258 /* Save xmm0-xmm7 registers to detect if any of them are
259 changed by audit module. */
260 vmovdqa %xmm0, (LR_SIZE)(%rsp)
261 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
262 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
263 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
264 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
265 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
266 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
267 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
268# endif
269
270 mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx.
271 mov 48(%rbx), %RDX_LP # Load return address if needed.
272 mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register.
273 mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index
274 lea 16(%rbx), %R8_LP # Address of framesize
275 call _dl_profile_fixup # Call resolver.
276
277 mov %RAX_LP, %R11_LP # Save return value.
278
279 movq 8(%rbx), %rax # Get back register content.
280 movq LR_RDX_OFFSET(%rsp), %rdx
281 movq LR_R8_OFFSET(%rsp), %r8
282 movq LR_R9_OFFSET(%rsp), %r9
283
284 movaps (LR_XMM_OFFSET)(%rsp), %xmm0
285 movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
286 movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
287 movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
288 movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
289 movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
290 movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
291 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
292
293# ifdef RESTORE_AVX
294 /* Check if any xmm0-xmm7 registers are changed by audit
295 module. */
296 vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
297 vpmovmskb %xmm8, %esi
298 cmpl $0xffff, %esi
299 je 2f
300 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
301 jmp 1f
3022: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
303 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
304
3051: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
306 vpmovmskb %xmm8, %esi
307 cmpl $0xffff, %esi
308 je 2f
309 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
310 jmp 1f
3112: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
312 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
313
3141: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
315 vpmovmskb %xmm8, %esi
316 cmpl $0xffff, %esi
317 je 2f
318 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
319 jmp 1f
3202: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
321 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
322
3231: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
324 vpmovmskb %xmm8, %esi
325 cmpl $0xffff, %esi
326 je 2f
327 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
328 jmp 1f
3292: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
330 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
331
3321: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
333 vpmovmskb %xmm8, %esi
334 cmpl $0xffff, %esi
335 je 2f
336 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
337 jmp 1f
3382: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
339 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
340
3411: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
342 vpmovmskb %xmm8, %esi
343 cmpl $0xffff, %esi
344 je 2f
345 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
346 jmp 1f
3472: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
348 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
349
3501: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
351 vpmovmskb %xmm8, %esi
352 cmpl $0xffff, %esi
353 je 2f
354 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
355 jmp 1f
3562: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
357 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
358
3591: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
360 vpmovmskb %xmm8, %esi
361 cmpl $0xffff, %esi
362 je 2f
363 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
364 jmp 1f
3652: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
366 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
367
3681:
369# endif
370
371# ifndef __ILP32__
372# ifdef HAVE_MPX_SUPPORT
373 bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound
374 bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers.
375 bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2
376 bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3
377# else
378 .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET)
379 .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
380 .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
381 .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
382# endif
383# endif
384
385 mov 16(%rbx), %R10_LP # Anything in framesize?
386 test %R10_LP, %R10_LP
387 PRESERVE_BND_REGS_PREFIX
388 jns 3f
389
390 /* There's nothing in the frame size, so there
391 will be no call to the _dl_call_pltexit. */
392
393 /* Get back registers content. */
394 movq LR_RCX_OFFSET(%rsp), %rcx
395 movq LR_RSI_OFFSET(%rsp), %rsi
396 movq LR_RDI_OFFSET(%rsp), %rdi
397
398 mov %RBX_LP, %RSP_LP
399 movq (%rsp), %rbx
400 cfi_restore(%rbx)
401 cfi_def_cfa_register(%rsp)
402
403 add $48, %RSP_LP # Adjust the stack to the return value
404 # (eats the reloc index and link_map)
405 cfi_adjust_cfa_offset(-48)
406 PRESERVE_BND_REGS_PREFIX
407 jmp *%r11 # Jump to function address.
408
4093:
410 cfi_adjust_cfa_offset(48)
411 cfi_rel_offset(%rbx, 0)
412 cfi_def_cfa_register(%rbx)
413
414 /* At this point we need to prepare new stack for the function
415 which has to be called. We copy the original stack to a
416 temporary buffer of the size specified by the 'framesize'
417 returned from _dl_profile_fixup */
418
419 lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
420 add $8, %R10_LP
421 and $-16, %R10_LP
422 mov %R10_LP, %RCX_LP
423 sub %R10_LP, %RSP_LP
424 mov %RSP_LP, %RDI_LP
425 shr $3, %RCX_LP
426 rep
427 movsq
428
429 movq 24(%rdi), %rcx # Get back register content.
430 movq 32(%rdi), %rsi
431 movq 40(%rdi), %rdi
432
433 PRESERVE_BND_REGS_PREFIX
434 call *%r11
435
436 mov 24(%rbx), %RSP_LP # Drop the copied stack content
437
438 /* Now we have to prepare the La_x86_64_retval structure for the
439 _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
440 so we just need to allocate the sizeof(La_x86_64_retval) space on
441 the stack, since the alignment has already been taken care of. */
442# ifdef RESTORE_AVX
443 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
444 registers to detect if xmm0/xmm1 registers are changed
445 by audit module. Since rsp is aligned to VEC_SIZE, we
446 need to make sure that the address of La_x86_64_retval +
447 LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
448# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
449# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
450# if LRV_MISALIGNED == 0
451 sub $LRV_SPACE, %RSP_LP
452# else
453 sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
454# endif
455# else
456 sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
457# endif
458 mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx.
459
460 /* Fill in the La_x86_64_retval structure. */
461 movq %rax, LRV_RAX_OFFSET(%rcx)
462 movq %rdx, LRV_RDX_OFFSET(%rcx)
463
464 movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
465 movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
466
467# ifdef RESTORE_AVX
468 /* This is to support AVX audit modules. */
469 VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
470 VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
471
472 /* Save xmm0/xmm1 registers to detect if they are changed
473 by audit module. */
474 vmovdqa %xmm0, (LRV_SIZE)(%rcx)
475 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
476# endif
477
478# ifndef __ILP32__
479# ifdef HAVE_MPX_SUPPORT
480 bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds.
481 bndmov %bnd1, LRV_BND1_OFFSET(%rcx)
482# else
483 .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET)
484 .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET)
485# endif
486# endif
487
488 fstpt LRV_ST0_OFFSET(%rcx)
489 fstpt LRV_ST1_OFFSET(%rcx)
490
491 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
492 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
493 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
494 call _dl_call_pltexit
495
496 /* Restore return registers. */
497 movq LRV_RAX_OFFSET(%rsp), %rax
498 movq LRV_RDX_OFFSET(%rsp), %rdx
499
500 movaps LRV_XMM0_OFFSET(%rsp), %xmm0
501 movaps LRV_XMM1_OFFSET(%rsp), %xmm1
502
503# ifdef RESTORE_AVX
504 /* Check if xmm0/xmm1 registers are changed by audit module. */
505 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
506 vpmovmskb %xmm2, %esi
507 cmpl $0xffff, %esi
508 jne 1f
509 VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
510
5111: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
512 vpmovmskb %xmm2, %esi
513 cmpl $0xffff, %esi
514 jne 1f
515 VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
516
5171:
518# endif
519
520# ifndef __ILP32__
521# ifdef HAVE_MPX_SUPPORT
522 bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers.
523 bndmov LRV_BND1_OFFSET(%rsp), %bnd1
524# else
525 .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET)
526 .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET)
527# endif
528# endif
529
530 fldt LRV_ST1_OFFSET(%rsp)
531 fldt LRV_ST0_OFFSET(%rsp)
532
533 mov %RBX_LP, %RSP_LP
534 movq (%rsp), %rbx
535 cfi_restore(%rbx)
536 cfi_def_cfa_register(%rsp)
537
538 add $48, %RSP_LP # Adjust the stack to the return value
539 # (eats the reloc index and link_map)
540 cfi_adjust_cfa_offset(-48)
541 PRESERVE_BND_REGS_PREFIX
542 retq
543
544 cfi_endproc
545 .size _dl_runtime_profile, .-_dl_runtime_profile
546#endif
547