1/* PLT trampolines. x86-64 version.
2 Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 .text
20#ifdef _dl_runtime_resolve
21
22# undef REGISTER_SAVE_AREA
23# undef LOCAL_STORAGE_AREA
24# undef BASE
25
26# if (STATE_SAVE_ALIGNMENT % 16) != 0
27# error STATE_SAVE_ALIGNMENT must be multples of 16
28# endif
29
30# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
31# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
32# endif
33
34# if DL_RUNTIME_RESOLVE_REALIGN_STACK
35/* Local stack area before jumping to function address: RBX. */
36# define LOCAL_STORAGE_AREA 8
37# define BASE rbx
38# ifdef USE_FXSAVE
39/* Use fxsave to save XMM registers. */
40# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
41# if (REGISTER_SAVE_AREA % 16) != 0
42# error REGISTER_SAVE_AREA must be multples of 16
43# endif
44# endif
45# else
46# ifndef USE_FXSAVE
47# error USE_FXSAVE must be defined
48# endif
49/* Use fxsave to save XMM registers. */
50# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
51/* Local stack area before jumping to function address: All saved
52 registers. */
53# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
54# define BASE rsp
55# if (REGISTER_SAVE_AREA % 16) != 8
56# error REGISTER_SAVE_AREA must be odd multples of 8
57# endif
58# endif
59
60 .globl _dl_runtime_resolve
61 .hidden _dl_runtime_resolve
62 .type _dl_runtime_resolve, @function
63 .align 16
64 cfi_startproc
65_dl_runtime_resolve:
66 cfi_adjust_cfa_offset(16) # Incorporate PLT
67# if DL_RUNTIME_RESOLVE_REALIGN_STACK
68# if LOCAL_STORAGE_AREA != 8
69# error LOCAL_STORAGE_AREA must be 8
70# endif
71 pushq %rbx # push subtracts stack by 8.
72 cfi_adjust_cfa_offset(8)
73 cfi_rel_offset(%rbx, 0)
74 mov %RSP_LP, %RBX_LP
75 cfi_def_cfa_register(%rbx)
76 and $-STATE_SAVE_ALIGNMENT, %RSP_LP
77# endif
78# ifdef REGISTER_SAVE_AREA
79 sub $REGISTER_SAVE_AREA, %RSP_LP
80# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
81 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
82# endif
83# else
84 # Allocate stack space of the required size to save the state.
85# if IS_IN (rtld)
86 sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
87# else
88 sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
89# endif
90# endif
91 # Preserve registers otherwise clobbered.
92 movq %rax, REGISTER_SAVE_RAX(%rsp)
93 movq %rcx, REGISTER_SAVE_RCX(%rsp)
94 movq %rdx, REGISTER_SAVE_RDX(%rsp)
95 movq %rsi, REGISTER_SAVE_RSI(%rsp)
96 movq %rdi, REGISTER_SAVE_RDI(%rsp)
97 movq %r8, REGISTER_SAVE_R8(%rsp)
98 movq %r9, REGISTER_SAVE_R9(%rsp)
99# ifdef USE_FXSAVE
100 fxsave STATE_SAVE_OFFSET(%rsp)
101# else
102 movl $STATE_SAVE_MASK, %eax
103 xorl %edx, %edx
104 # Clear the XSAVE Header.
105# ifdef USE_XSAVE
106 movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
107 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
108# endif
109 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
110 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
111 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
112 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
113 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
114 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
115# ifdef USE_XSAVE
116 xsave STATE_SAVE_OFFSET(%rsp)
117# else
118 xsavec STATE_SAVE_OFFSET(%rsp)
119# endif
120# endif
121 # Copy args pushed by PLT in register.
122 # %rdi: link_map, %rsi: reloc_index
123 mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
124 mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
125 call _dl_fixup # Call resolver.
126 mov %RAX_LP, %R11_LP # Save return value
127 # Get register content back.
128# ifdef USE_FXSAVE
129 fxrstor STATE_SAVE_OFFSET(%rsp)
130# else
131 movl $STATE_SAVE_MASK, %eax
132 xorl %edx, %edx
133 xrstor STATE_SAVE_OFFSET(%rsp)
134# endif
135 movq REGISTER_SAVE_R9(%rsp), %r9
136 movq REGISTER_SAVE_R8(%rsp), %r8
137 movq REGISTER_SAVE_RDI(%rsp), %rdi
138 movq REGISTER_SAVE_RSI(%rsp), %rsi
139 movq REGISTER_SAVE_RDX(%rsp), %rdx
140 movq REGISTER_SAVE_RCX(%rsp), %rcx
141 movq REGISTER_SAVE_RAX(%rsp), %rax
142# if DL_RUNTIME_RESOLVE_REALIGN_STACK
143 mov %RBX_LP, %RSP_LP
144 cfi_def_cfa_register(%rsp)
145 movq (%rsp), %rbx
146 cfi_restore(%rbx)
147# endif
148 # Adjust stack(PLT did 2 pushes)
149 add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
150 cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
151 # Preserve bound registers.
152 PRESERVE_BND_REGS_PREFIX
153 jmp *%r11 # Jump to function address.
154 cfi_endproc
155 .size _dl_runtime_resolve, .-_dl_runtime_resolve
156#endif
157
158
159#if !defined PROF && defined _dl_runtime_profile
160# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
161# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
162# endif
163
164 .globl _dl_runtime_profile
165 .hidden _dl_runtime_profile
166 .type _dl_runtime_profile, @function
167 .align 16
168_dl_runtime_profile:
169 cfi_startproc
170 cfi_adjust_cfa_offset(16) # Incorporate PLT
171 /* The La_x86_64_regs data structure pointed to by the
172 fourth paramater must be VEC_SIZE-byte aligned. This must
173 be explicitly enforced. We have the set up a dynamically
174 sized stack frame. %rbx points to the top half which
175 has a fixed size and preserves the original stack pointer. */
176
177 sub $32, %RSP_LP # Allocate the local storage.
178 cfi_adjust_cfa_offset(32)
179 movq %rbx, (%rsp)
180 cfi_rel_offset(%rbx, 0)
181
182 /* On the stack:
183 56(%rbx) parameter #1
184 48(%rbx) return address
185
186 40(%rbx) reloc index
187 32(%rbx) link_map
188
189 24(%rbx) La_x86_64_regs pointer
190 16(%rbx) framesize
191 8(%rbx) rax
192 (%rbx) rbx
193 */
194
195 movq %rax, 8(%rsp)
196 mov %RSP_LP, %RBX_LP
197 cfi_def_cfa_register(%rbx)
198
199 /* Actively align the La_x86_64_regs structure. */
200 and $-VEC_SIZE, %RSP_LP
201 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
202 to detect if any xmm0-xmm7 registers are changed by audit
203 module. */
204 sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
205 movq %rsp, 24(%rbx)
206
207 /* Fill the La_x86_64_regs structure. */
208 movq %rdx, LR_RDX_OFFSET(%rsp)
209 movq %r8, LR_R8_OFFSET(%rsp)
210 movq %r9, LR_R9_OFFSET(%rsp)
211 movq %rcx, LR_RCX_OFFSET(%rsp)
212 movq %rsi, LR_RSI_OFFSET(%rsp)
213 movq %rdi, LR_RDI_OFFSET(%rsp)
214 movq %rbp, LR_RBP_OFFSET(%rsp)
215
216 lea 48(%rbx), %RAX_LP
217 movq %rax, LR_RSP_OFFSET(%rsp)
218
219 /* We always store the XMM registers even if AVX is available.
220 This is to provide backward binary compatibility for existing
221 audit modules. */
222 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
223 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
224 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
225 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
226 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
227 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
228 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
229 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
230
231# ifndef __ILP32__
232# ifdef HAVE_MPX_SUPPORT
233 bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
234 bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
235 bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
236 bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
237# else
238 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
239 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
240 .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
241 .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
242# endif
243# endif
244
245# ifdef RESTORE_AVX
246 /* This is to support AVX audit modules. */
247 VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
248 VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
249 VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
250 VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
251 VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
252 VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
253 VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
254 VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
255
256 /* Save xmm0-xmm7 registers to detect if any of them are
257 changed by audit module. */
258 vmovdqa %xmm0, (LR_SIZE)(%rsp)
259 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
260 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
261 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
262 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
263 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
264 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
265 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
266# endif
267
268 mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx.
269 mov 48(%rbx), %RDX_LP # Load return address if needed.
270 mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register.
271 mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index
272 lea 16(%rbx), %R8_LP # Address of framesize
273 call _dl_profile_fixup # Call resolver.
274
275 mov %RAX_LP, %R11_LP # Save return value.
276
277 movq 8(%rbx), %rax # Get back register content.
278 movq LR_RDX_OFFSET(%rsp), %rdx
279 movq LR_R8_OFFSET(%rsp), %r8
280 movq LR_R9_OFFSET(%rsp), %r9
281
282 movaps (LR_XMM_OFFSET)(%rsp), %xmm0
283 movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
284 movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
285 movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
286 movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
287 movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
288 movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
289 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
290
291# ifdef RESTORE_AVX
292 /* Check if any xmm0-xmm7 registers are changed by audit
293 module. */
294 vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
295 vpmovmskb %xmm8, %esi
296 cmpl $0xffff, %esi
297 je 2f
298 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
299 jmp 1f
3002: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
301 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
302
3031: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
304 vpmovmskb %xmm8, %esi
305 cmpl $0xffff, %esi
306 je 2f
307 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
308 jmp 1f
3092: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
310 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
311
3121: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
313 vpmovmskb %xmm8, %esi
314 cmpl $0xffff, %esi
315 je 2f
316 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
317 jmp 1f
3182: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
319 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
320
3211: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
322 vpmovmskb %xmm8, %esi
323 cmpl $0xffff, %esi
324 je 2f
325 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
326 jmp 1f
3272: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
328 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
329
3301: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
331 vpmovmskb %xmm8, %esi
332 cmpl $0xffff, %esi
333 je 2f
334 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
335 jmp 1f
3362: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
337 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
338
3391: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
340 vpmovmskb %xmm8, %esi
341 cmpl $0xffff, %esi
342 je 2f
343 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
344 jmp 1f
3452: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
346 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
347
3481: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
349 vpmovmskb %xmm8, %esi
350 cmpl $0xffff, %esi
351 je 2f
352 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
353 jmp 1f
3542: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
355 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
356
3571: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
358 vpmovmskb %xmm8, %esi
359 cmpl $0xffff, %esi
360 je 2f
361 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
362 jmp 1f
3632: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
364 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
365
3661:
367# endif
368
369# ifndef __ILP32__
370# ifdef HAVE_MPX_SUPPORT
371 bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound
372 bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers.
373 bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2
374 bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3
375# else
376 .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET)
377 .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
378 .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
379 .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
380# endif
381# endif
382
383 mov 16(%rbx), %R10_LP # Anything in framesize?
384 test %R10_LP, %R10_LP
385 PRESERVE_BND_REGS_PREFIX
386 jns 3f
387
388 /* There's nothing in the frame size, so there
389 will be no call to the _dl_call_pltexit. */
390
391 /* Get back registers content. */
392 movq LR_RCX_OFFSET(%rsp), %rcx
393 movq LR_RSI_OFFSET(%rsp), %rsi
394 movq LR_RDI_OFFSET(%rsp), %rdi
395
396 mov %RBX_LP, %RSP_LP
397 movq (%rsp), %rbx
398 cfi_restore(%rbx)
399 cfi_def_cfa_register(%rsp)
400
401 add $48, %RSP_LP # Adjust the stack to the return value
402 # (eats the reloc index and link_map)
403 cfi_adjust_cfa_offset(-48)
404 PRESERVE_BND_REGS_PREFIX
405 jmp *%r11 # Jump to function address.
406
4073:
408 cfi_adjust_cfa_offset(48)
409 cfi_rel_offset(%rbx, 0)
410 cfi_def_cfa_register(%rbx)
411
412 /* At this point we need to prepare new stack for the function
413 which has to be called. We copy the original stack to a
414 temporary buffer of the size specified by the 'framesize'
415 returned from _dl_profile_fixup */
416
417 lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
418 add $8, %R10_LP
419 and $-16, %R10_LP
420 mov %R10_LP, %RCX_LP
421 sub %R10_LP, %RSP_LP
422 mov %RSP_LP, %RDI_LP
423 shr $3, %RCX_LP
424 rep
425 movsq
426
427 movq 24(%rdi), %rcx # Get back register content.
428 movq 32(%rdi), %rsi
429 movq 40(%rdi), %rdi
430
431 PRESERVE_BND_REGS_PREFIX
432 call *%r11
433
434 mov 24(%rbx), %RSP_LP # Drop the copied stack content
435
436 /* Now we have to prepare the La_x86_64_retval structure for the
437 _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
438 so we just need to allocate the sizeof(La_x86_64_retval) space on
439 the stack, since the alignment has already been taken care of. */
440# ifdef RESTORE_AVX
441 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
442 registers to detect if xmm0/xmm1 registers are changed
443 by audit module. Since rsp is aligned to VEC_SIZE, we
444 need to make sure that the address of La_x86_64_retval +
445 LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
446# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
447# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
448# if LRV_MISALIGNED == 0
449 sub $LRV_SPACE, %RSP_LP
450# else
451 sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
452# endif
453# else
454 sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
455# endif
456 mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx.
457
458 /* Fill in the La_x86_64_retval structure. */
459 movq %rax, LRV_RAX_OFFSET(%rcx)
460 movq %rdx, LRV_RDX_OFFSET(%rcx)
461
462 movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
463 movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
464
465# ifdef RESTORE_AVX
466 /* This is to support AVX audit modules. */
467 VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
468 VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
469
470 /* Save xmm0/xmm1 registers to detect if they are changed
471 by audit module. */
472 vmovdqa %xmm0, (LRV_SIZE)(%rcx)
473 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
474# endif
475
476# ifndef __ILP32__
477# ifdef HAVE_MPX_SUPPORT
478 bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds.
479 bndmov %bnd1, LRV_BND1_OFFSET(%rcx)
480# else
481 .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET)
482 .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET)
483# endif
484# endif
485
486 fstpt LRV_ST0_OFFSET(%rcx)
487 fstpt LRV_ST1_OFFSET(%rcx)
488
489 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
490 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
491 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
492 call _dl_call_pltexit
493
494 /* Restore return registers. */
495 movq LRV_RAX_OFFSET(%rsp), %rax
496 movq LRV_RDX_OFFSET(%rsp), %rdx
497
498 movaps LRV_XMM0_OFFSET(%rsp), %xmm0
499 movaps LRV_XMM1_OFFSET(%rsp), %xmm1
500
501# ifdef RESTORE_AVX
502 /* Check if xmm0/xmm1 registers are changed by audit module. */
503 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
504 vpmovmskb %xmm2, %esi
505 cmpl $0xffff, %esi
506 jne 1f
507 VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
508
5091: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
510 vpmovmskb %xmm2, %esi
511 cmpl $0xffff, %esi
512 jne 1f
513 VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
514
5151:
516# endif
517
518# ifndef __ILP32__
519# ifdef HAVE_MPX_SUPPORT
520 bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers.
521 bndmov LRV_BND1_OFFSET(%rsp), %bnd1
522# else
523 .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET)
524 .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET)
525# endif
526# endif
527
528 fldt LRV_ST1_OFFSET(%rsp)
529 fldt LRV_ST0_OFFSET(%rsp)
530
531 mov %RBX_LP, %RSP_LP
532 movq (%rsp), %rbx
533 cfi_restore(%rbx)
534 cfi_def_cfa_register(%rsp)
535
536 add $48, %RSP_LP # Adjust the stack to the return value
537 # (eats the reloc index and link_map)
538 cfi_adjust_cfa_offset(-48)
539 PRESERVE_BND_REGS_PREFIX
540 retq
541
542 cfi_endproc
543 .size _dl_runtime_profile, .-_dl_runtime_profile
544#endif
545