1/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
2 Copyright (C) 2004-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include <tls.h>
21#include "tlsdesc.h"
22
23 .text
24
25 /* This function is used to compute the TP offset for symbols in
26 Static TLS, i.e., whose TP offset is the same for all
27 threads.
28
29 The incoming %rax points to the TLS descriptor, such that
30 0(%rax) points to _dl_tlsdesc_return itself, and 8(%rax) holds
31 the TP offset of the symbol corresponding to the object
32 denoted by the argument. */
33
34 .hidden _dl_tlsdesc_return
35 .global _dl_tlsdesc_return
36 .type _dl_tlsdesc_return,@function
37 cfi_startproc
38 .align 16
39_dl_tlsdesc_return:
40 _CET_ENDBR
41 movq 8(%rax), %rax
42 ret
43 cfi_endproc
44 .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
45
46 /* This function is used for undefined weak TLS symbols, for
47 which the base address (i.e., disregarding any addend) should
48 resolve to NULL.
49
50 %rax points to the TLS descriptor, such that 0(%rax) points to
51 _dl_tlsdesc_undefweak itself, and 8(%rax) holds the addend.
52 We return the addend minus the TP, such that, when the caller
53 adds TP, it gets the addend back. If that's zero, as usual,
54 that's most likely a NULL pointer. */
55
56 .hidden _dl_tlsdesc_undefweak
57 .global _dl_tlsdesc_undefweak
58 .type _dl_tlsdesc_undefweak,@function
59 cfi_startproc
60 .align 16
61_dl_tlsdesc_undefweak:
62 _CET_ENDBR
63 movq 8(%rax), %rax
64 subq %fs:0, %rax
65 ret
66 cfi_endproc
67 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
68
69#ifdef SHARED
70 .hidden _dl_tlsdesc_dynamic
71 .global _dl_tlsdesc_dynamic
72 .type _dl_tlsdesc_dynamic,@function
73
74 /* %rax points to the TLS descriptor, such that 0(%rax) points to
75 _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
76 tlsdesc_dynamic_arg object. It must return in %rax the offset
77 between the thread pointer and the object denoted by the
78 argument, without clobbering any registers.
79
80 The assembly code that follows is a rendition of the following
81 C code, hand-optimized a little bit.
82
83ptrdiff_t
84_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
85{
86 struct tlsdesc_dynamic_arg *td = tdp->arg;
87 dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
88 if (__builtin_expect (td->gen_count <= dtv[0].counter
89 && (dtv[td->tlsinfo.ti_module].pointer.val
90 != TLS_DTV_UNALLOCATED),
91 1))
92 return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
93 - __thread_pointer;
94
95 return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
96}
97*/
98 cfi_startproc
99 .align 16
100_dl_tlsdesc_dynamic:
101 _CET_ENDBR
102 /* Preserve call-clobbered registers that we modify.
103 We need two scratch regs anyway. */
104 movq %rsi, -16(%rsp)
105 movq %fs:DTV_OFFSET, %rsi
106 movq %rdi, -8(%rsp)
107 movq TLSDESC_ARG(%rax), %rdi
108 movq (%rsi), %rax
109 cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
110 ja .Lslow
111 movq TLSDESC_MODID(%rdi), %rax
112 salq $4, %rax
113 movq (%rax,%rsi), %rax
114 cmpq $-1, %rax
115 je .Lslow
116 addq TLSDESC_MODOFF(%rdi), %rax
117.Lret:
118 movq -16(%rsp), %rsi
119 subq %fs:0, %rax
120 movq -8(%rsp), %rdi
121 ret
122.Lslow:
123 /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
124 r10 and r11. Also, align the stack, that's off by 8 bytes. */
125 subq $72, %rsp
126 cfi_adjust_cfa_offset (72)
127 movq %rdx, 8(%rsp)
128 movq %rcx, 16(%rsp)
129 movq %r8, 24(%rsp)
130 movq %r9, 32(%rsp)
131 movq %r10, 40(%rsp)
132 movq %r11, 48(%rsp)
133 /* %rdi already points to the tlsinfo data structure. */
134#ifdef NO_RTLD_HIDDEN
135 call JUMPTARGET (__tls_get_addr)
136#else
137 call HIDDEN_JUMPTARGET (__tls_get_addr)
138#endif
139 movq 8(%rsp), %rdx
140 movq 16(%rsp), %rcx
141 movq 24(%rsp), %r8
142 movq 32(%rsp), %r9
143 movq 40(%rsp), %r10
144 movq 48(%rsp), %r11
145 addq $72, %rsp
146 cfi_adjust_cfa_offset (-72)
147 jmp .Lret
148 cfi_endproc
149 .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
150#endif /* SHARED */
151
152 /* This function is a wrapper for a lazy resolver for TLS_DESC
153 RELA relocations. The incoming 0(%rsp) points to the caller's
154 link map, pushed by the dynamic object's internal lazy TLS
155 resolver front-end before tail-calling us. We need to pop it
156 ourselves. %rax points to a TLS descriptor, such that 0(%rax)
157 holds the address of the internal resolver front-end (unless
158 some other thread beat us to resolving it) and 8(%rax) holds a
159 pointer to the relocation.
160
161 When the actual resolver returns, it will have adjusted the
162 TLS descriptor such that we can tail-call it for it to return
163 the TP offset of the symbol. */
164
165 .hidden _dl_tlsdesc_resolve_rela
166 .global _dl_tlsdesc_resolve_rela
167 .type _dl_tlsdesc_resolve_rela,@function
168 cfi_startproc
169 .align 16
170 /* The PLT entry will have pushed the link_map pointer. */
171_dl_tlsdesc_resolve_rela:
172 _CET_ENDBR
173 cfi_adjust_cfa_offset (8)
174 /* Save all call-clobbered registers. Add 8 bytes for push in
175 the PLT entry to align the stack. */
176 subq $80, %rsp
177 cfi_adjust_cfa_offset (80)
178 movq %rax, (%rsp)
179 movq %rdi, 8(%rsp)
180 movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
181 movq %rsi, 16(%rsp)
182 movq 80(%rsp), %rsi /* Pass link_map* in %rsi. */
183 movq %r8, 24(%rsp)
184 movq %r9, 32(%rsp)
185 movq %r10, 40(%rsp)
186 movq %r11, 48(%rsp)
187 movq %rdx, 56(%rsp)
188 movq %rcx, 64(%rsp)
189 call _dl_tlsdesc_resolve_rela_fixup
190 movq (%rsp), %rax
191 movq 8(%rsp), %rdi
192 movq 16(%rsp), %rsi
193 movq 24(%rsp), %r8
194 movq 32(%rsp), %r9
195 movq 40(%rsp), %r10
196 movq 48(%rsp), %r11
197 movq 56(%rsp), %rdx
198 movq 64(%rsp), %rcx
199 addq $88, %rsp
200 cfi_adjust_cfa_offset (-88)
201 jmp *(%rax)
202 cfi_endproc
203 .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
204
205 /* This function is a placeholder for lazy resolving of TLS
206 relocations. Once some thread starts resolving a TLS
207 relocation, it sets up the TLS descriptor to use this
208 resolver, such that other threads that would attempt to
209 resolve it concurrently may skip the call to the original lazy
210 resolver and go straight to a condition wait.
211
212 When the actual resolver returns, it will have adjusted the
213 TLS descriptor such that we can tail-call it for it to return
214 the TP offset of the symbol. */
215
216 .hidden _dl_tlsdesc_resolve_hold
217 .global _dl_tlsdesc_resolve_hold
218 .type _dl_tlsdesc_resolve_hold,@function
219 cfi_startproc
220 .align 16
221_dl_tlsdesc_resolve_hold:
2220:
223 _CET_ENDBR
224 /* Save all call-clobbered registers. */
225 subq $72, %rsp
226 cfi_adjust_cfa_offset (72)
227 movq %rax, (%rsp)
228 movq %rdi, 8(%rsp)
229 movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
230 movq %rsi, 16(%rsp)
231 /* Pass _dl_tlsdesc_resolve_hold's address in %rsi. */
232 leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi
233 movq %r8, 24(%rsp)
234 movq %r9, 32(%rsp)
235 movq %r10, 40(%rsp)
236 movq %r11, 48(%rsp)
237 movq %rdx, 56(%rsp)
238 movq %rcx, 64(%rsp)
239 call _dl_tlsdesc_resolve_hold_fixup
2401:
241 movq (%rsp), %rax
242 movq 8(%rsp), %rdi
243 movq 16(%rsp), %rsi
244 movq 24(%rsp), %r8
245 movq 32(%rsp), %r9
246 movq 40(%rsp), %r10
247 movq 48(%rsp), %r11
248 movq 56(%rsp), %rdx
249 movq 64(%rsp), %rcx
250 addq $72, %rsp
251 cfi_adjust_cfa_offset (-72)
252 jmp *(%rax)
253 cfi_endproc
254 .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
255