1/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include <tls.h>
21#include "tlsdesc.h"
22
23 .text
24
25 /* This function is used to compute the TP offset for symbols in
26 Static TLS, i.e., whose TP offset is the same for all
27 threads.
28
29 The incoming %rax points to the TLS descriptor, such that
30 0(%rax) points to _dl_tlsdesc_return itself, and 8(%rax) holds
31 the TP offset of the symbol corresponding to the object
32 denoted by the argument. */
33
34 .hidden _dl_tlsdesc_return
35 .global _dl_tlsdesc_return
36 .type _dl_tlsdesc_return,@function
37 cfi_startproc
38 .align 16
39_dl_tlsdesc_return:
40 movq 8(%rax), %rax
41 ret
42 cfi_endproc
43 .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
44
45 /* This function is used for undefined weak TLS symbols, for
46 which the base address (i.e., disregarding any addend) should
47 resolve to NULL.
48
49 %rax points to the TLS descriptor, such that 0(%rax) points to
50 _dl_tlsdesc_undefweak itself, and 8(%rax) holds the addend.
51 We return the addend minus the TP, such that, when the caller
52 adds TP, it gets the addend back. If that's zero, as usual,
53 that's most likely a NULL pointer. */
54
55 .hidden _dl_tlsdesc_undefweak
56 .global _dl_tlsdesc_undefweak
57 .type _dl_tlsdesc_undefweak,@function
58 cfi_startproc
59 .align 16
60_dl_tlsdesc_undefweak:
61 movq 8(%rax), %rax
62 subq %fs:0, %rax
63 ret
64 cfi_endproc
65 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
66
67#ifdef SHARED
68 .hidden _dl_tlsdesc_dynamic
69 .global _dl_tlsdesc_dynamic
70 .type _dl_tlsdesc_dynamic,@function
71
72 /* %rax points to the TLS descriptor, such that 0(%rax) points to
73 _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
74 tlsdesc_dynamic_arg object. It must return in %rax the offset
75 between the thread pointer and the object denoted by the
76 argument, without clobbering any registers.
77
78 The assembly code that follows is a rendition of the following
79 C code, hand-optimized a little bit.
80
81ptrdiff_t
82_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
83{
84 struct tlsdesc_dynamic_arg *td = tdp->arg;
85 dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
86 if (__builtin_expect (td->gen_count <= dtv[0].counter
87 && (dtv[td->tlsinfo.ti_module].pointer.val
88 != TLS_DTV_UNALLOCATED),
89 1))
90 return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
91 - __thread_pointer;
92
93 return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
94}
95*/
96 cfi_startproc
97 .align 16
98_dl_tlsdesc_dynamic:
99 /* Preserve call-clobbered registers that we modify.
100 We need two scratch regs anyway. */
101 movq %rsi, -16(%rsp)
102 movq %fs:DTV_OFFSET, %rsi
103 movq %rdi, -8(%rsp)
104 movq TLSDESC_ARG(%rax), %rdi
105 movq (%rsi), %rax
106 cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
107 ja .Lslow
108 movq TLSDESC_MODID(%rdi), %rax
109 salq $4, %rax
110 movq (%rax,%rsi), %rax
111 cmpq $-1, %rax
112 je .Lslow
113 addq TLSDESC_MODOFF(%rdi), %rax
114.Lret:
115 movq -16(%rsp), %rsi
116 subq %fs:0, %rax
117 movq -8(%rsp), %rdi
118 ret
119.Lslow:
120 /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
121 r10 and r11. Also, align the stack, that's off by 8 bytes. */
122 subq $72, %rsp
123 cfi_adjust_cfa_offset (72)
124 movq %rdx, 8(%rsp)
125 movq %rcx, 16(%rsp)
126 movq %r8, 24(%rsp)
127 movq %r9, 32(%rsp)
128 movq %r10, 40(%rsp)
129 movq %r11, 48(%rsp)
130 /* %rdi already points to the tlsinfo data structure. */
131 call HIDDEN_JUMPTARGET (__tls_get_addr)
132 movq 8(%rsp), %rdx
133 movq 16(%rsp), %rcx
134 movq 24(%rsp), %r8
135 movq 32(%rsp), %r9
136 movq 40(%rsp), %r10
137 movq 48(%rsp), %r11
138 addq $72, %rsp
139 cfi_adjust_cfa_offset (-72)
140 jmp .Lret
141 cfi_endproc
142 .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
143#endif /* SHARED */
144
145 /* This function is a wrapper for a lazy resolver for TLS_DESC
146 RELA relocations. The incoming 0(%rsp) points to the caller's
147 link map, pushed by the dynamic object's internal lazy TLS
148 resolver front-end before tail-calling us. We need to pop it
149 ourselves. %rax points to a TLS descriptor, such that 0(%rax)
150 holds the address of the internal resolver front-end (unless
151 some other thread beat us to resolving it) and 8(%rax) holds a
152 pointer to the relocation.
153
154 When the actual resolver returns, it will have adjusted the
155 TLS descriptor such that we can tail-call it for it to return
156 the TP offset of the symbol. */
157
158 .hidden _dl_tlsdesc_resolve_rela
159 .global _dl_tlsdesc_resolve_rela
160 .type _dl_tlsdesc_resolve_rela,@function
161 cfi_startproc
162 .align 16
163 /* The PLT entry will have pushed the link_map pointer. */
164_dl_tlsdesc_resolve_rela:
165 cfi_adjust_cfa_offset (8)
166 /* Save all call-clobbered registers. Add 8 bytes for push in
167 the PLT entry to align the stack. */
168 subq $80, %rsp
169 cfi_adjust_cfa_offset (80)
170 movq %rax, (%rsp)
171 movq %rdi, 8(%rsp)
172 movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
173 movq %rsi, 16(%rsp)
174 movq 80(%rsp), %rsi /* Pass link_map* in %rsi. */
175 movq %r8, 24(%rsp)
176 movq %r9, 32(%rsp)
177 movq %r10, 40(%rsp)
178 movq %r11, 48(%rsp)
179 movq %rdx, 56(%rsp)
180 movq %rcx, 64(%rsp)
181 call _dl_tlsdesc_resolve_rela_fixup
182 movq (%rsp), %rax
183 movq 8(%rsp), %rdi
184 movq 16(%rsp), %rsi
185 movq 24(%rsp), %r8
186 movq 32(%rsp), %r9
187 movq 40(%rsp), %r10
188 movq 48(%rsp), %r11
189 movq 56(%rsp), %rdx
190 movq 64(%rsp), %rcx
191 addq $88, %rsp
192 cfi_adjust_cfa_offset (-88)
193 jmp *(%rax)
194 cfi_endproc
195 .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
196
197 /* This function is a placeholder for lazy resolving of TLS
198 relocations. Once some thread starts resolving a TLS
199 relocation, it sets up the TLS descriptor to use this
200 resolver, such that other threads that would attempt to
201 resolve it concurrently may skip the call to the original lazy
202 resolver and go straight to a condition wait.
203
204 When the actual resolver returns, it will have adjusted the
205 TLS descriptor such that we can tail-call it for it to return
206 the TP offset of the symbol. */
207
208 .hidden _dl_tlsdesc_resolve_hold
209 .global _dl_tlsdesc_resolve_hold
210 .type _dl_tlsdesc_resolve_hold,@function
211 cfi_startproc
212 .align 16
213_dl_tlsdesc_resolve_hold:
2140:
215 /* Save all call-clobbered registers. */
216 subq $72, %rsp
217 cfi_adjust_cfa_offset (72)
218 movq %rax, (%rsp)
219 movq %rdi, 8(%rsp)
220 movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
221 movq %rsi, 16(%rsp)
222 /* Pass _dl_tlsdesc_resolve_hold's address in %rsi. */
223 leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi
224 movq %r8, 24(%rsp)
225 movq %r9, 32(%rsp)
226 movq %r10, 40(%rsp)
227 movq %r11, 48(%rsp)
228 movq %rdx, 56(%rsp)
229 movq %rcx, 64(%rsp)
230 call _dl_tlsdesc_resolve_hold_fixup
2311:
232 movq (%rsp), %rax
233 movq 8(%rsp), %rdi
234 movq 16(%rsp), %rsi
235 movq 24(%rsp), %r8
236 movq 32(%rsp), %r9
237 movq 40(%rsp), %r10
238 movq 48(%rsp), %r11
239 movq 56(%rsp), %rdx
240 movq 64(%rsp), %rcx
241 addq $72, %rsp
242 cfi_adjust_cfa_offset (-72)
243 jmp *(%rax)
244 cfi_endproc
245 .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
246