1/* Optimized wcslen for x86-64 with SSE2.
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22 .text
23ENTRY (__wcslen)
24 cmpl $0, (%rdi)
25 jz L(exit_tail0)
26 cmpl $0, 4(%rdi)
27 jz L(exit_tail1)
28 cmpl $0, 8(%rdi)
29 jz L(exit_tail2)
30 cmpl $0, 12(%rdi)
31 jz L(exit_tail3)
32 cmpl $0, 16(%rdi)
33 jz L(exit_tail4)
34 cmpl $0, 20(%rdi)
35 jz L(exit_tail5)
36 cmpl $0, 24(%rdi)
37 jz L(exit_tail6)
38 cmpl $0, 28(%rdi)
39 jz L(exit_tail7)
40
41 pxor %xmm0, %xmm0
42
43 lea 32(%rdi), %rax
44 lea 16(%rdi), %rcx
45 and $-16, %rax
46
47 pcmpeqd (%rax), %xmm0
48 pmovmskb %xmm0, %edx
49 pxor %xmm1, %xmm1
50 test %edx, %edx
51 lea 16(%rax), %rax
52 jnz L(exit)
53
54 pcmpeqd (%rax), %xmm1
55 pmovmskb %xmm1, %edx
56 pxor %xmm2, %xmm2
57 test %edx, %edx
58 lea 16(%rax), %rax
59 jnz L(exit)
60
61 pcmpeqd (%rax), %xmm2
62 pmovmskb %xmm2, %edx
63 pxor %xmm3, %xmm3
64 test %edx, %edx
65 lea 16(%rax), %rax
66 jnz L(exit)
67
68 pcmpeqd (%rax), %xmm3
69 pmovmskb %xmm3, %edx
70 test %edx, %edx
71 lea 16(%rax), %rax
72 jnz L(exit)
73
74 pcmpeqd (%rax), %xmm0
75 pmovmskb %xmm0, %edx
76 test %edx, %edx
77 lea 16(%rax), %rax
78 jnz L(exit)
79
80 pcmpeqd (%rax), %xmm1
81 pmovmskb %xmm1, %edx
82 test %edx, %edx
83 lea 16(%rax), %rax
84 jnz L(exit)
85
86 pcmpeqd (%rax), %xmm2
87 pmovmskb %xmm2, %edx
88 test %edx, %edx
89 lea 16(%rax), %rax
90 jnz L(exit)
91
92 pcmpeqd (%rax), %xmm3
93 pmovmskb %xmm3, %edx
94 test %edx, %edx
95 lea 16(%rax), %rax
96 jnz L(exit)
97
98 pcmpeqd (%rax), %xmm0
99 pmovmskb %xmm0, %edx
100 test %edx, %edx
101 lea 16(%rax), %rax
102 jnz L(exit)
103
104 pcmpeqd (%rax), %xmm1
105 pmovmskb %xmm1, %edx
106 test %edx, %edx
107 lea 16(%rax), %rax
108 jnz L(exit)
109
110 pcmpeqd (%rax), %xmm2
111 pmovmskb %xmm2, %edx
112 test %edx, %edx
113 lea 16(%rax), %rax
114 jnz L(exit)
115
116 pcmpeqd (%rax), %xmm3
117 pmovmskb %xmm3, %edx
118 test %edx, %edx
119 lea 16(%rax), %rax
120 jnz L(exit)
121
122 and $-0x40, %rax
123
124 .p2align 4
125L(aligned_64_loop):
126 movaps (%rax), %xmm0
127 movaps 16(%rax), %xmm1
128 movaps 32(%rax), %xmm2
129 movaps 48(%rax), %xmm6
130
131 pminub %xmm1, %xmm0
132 pminub %xmm6, %xmm2
133 pminub %xmm0, %xmm2
134 pcmpeqd %xmm3, %xmm2
135 pmovmskb %xmm2, %edx
136 test %edx, %edx
137 lea 64(%rax), %rax
138 jz L(aligned_64_loop)
139
140 pcmpeqd -64(%rax), %xmm3
141 pmovmskb %xmm3, %edx
142 test %edx, %edx
143 lea 48(%rcx), %rcx
144 jnz L(exit)
145
146 pcmpeqd %xmm1, %xmm3
147 pmovmskb %xmm3, %edx
148 test %edx, %edx
149 lea -16(%rcx), %rcx
150 jnz L(exit)
151
152 pcmpeqd -32(%rax), %xmm3
153 pmovmskb %xmm3, %edx
154 test %edx, %edx
155 lea -16(%rcx), %rcx
156 jnz L(exit)
157
158 pcmpeqd %xmm6, %xmm3
159 pmovmskb %xmm3, %edx
160 test %edx, %edx
161 lea -16(%rcx), %rcx
162 jnz L(exit)
163
164 jmp L(aligned_64_loop)
165
166 .p2align 4
167L(exit):
168 sub %rcx, %rax
169 shr $2, %rax
170 test %dl, %dl
171 jz L(exit_high)
172
173 mov %dl, %cl
174 and $15, %cl
175 jz L(exit_1)
176 ret
177
178 .p2align 4
179L(exit_high):
180 mov %dh, %ch
181 and $15, %ch
182 jz L(exit_3)
183 add $2, %rax
184 ret
185
186 .p2align 4
187L(exit_1):
188 add $1, %rax
189 ret
190
191 .p2align 4
192L(exit_3):
193 add $3, %rax
194 ret
195
196 .p2align 4
197L(exit_tail0):
198 xor %rax, %rax
199 ret
200
201 .p2align 4
202L(exit_tail1):
203 mov $1, %rax
204 ret
205
206 .p2align 4
207L(exit_tail2):
208 mov $2, %rax
209 ret
210
211 .p2align 4
212L(exit_tail3):
213 mov $3, %rax
214 ret
215
216 .p2align 4
217L(exit_tail4):
218 mov $4, %rax
219 ret
220
221 .p2align 4
222L(exit_tail5):
223 mov $5, %rax
224 ret
225
226 .p2align 4
227L(exit_tail6):
228 mov $6, %rax
229 ret
230
231 .p2align 4
232L(exit_tail7):
233 mov $7, %rax
234 ret
235
236END (__wcslen)
237
238weak_alias(__wcslen, wcslen)
239