1/* strchr with SSE2 without bsf
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#if IS_IN (libc)
21
22# include <sysdep.h>
23# include "asm-syntax.h"
24
25 atom_text_section
26ENTRY (__strchr_sse2_no_bsf)
27 movd %esi, %xmm1
28 movq %rdi, %rcx
29 punpcklbw %xmm1, %xmm1
30 andq $~15, %rdi
31 pxor %xmm2, %xmm2
32 punpcklbw %xmm1, %xmm1
33 orl $0xffffffff, %esi
34 movdqa (%rdi), %xmm0
35 pshufd $0, %xmm1, %xmm1
36 subq %rdi, %rcx
37 movdqa %xmm0, %xmm3
38 leaq 16(%rdi), %rdi
39 pcmpeqb %xmm1, %xmm0
40 pcmpeqb %xmm2, %xmm3
41 shl %cl, %esi
42 pmovmskb %xmm0, %eax
43 pmovmskb %xmm3, %edx
44 andl %esi, %eax
45 andl %esi, %edx
46 test %eax, %eax
47 jnz L(matches)
48 test %edx, %edx
49 jnz L(return_null)
50
51L(loop):
52 movdqa (%rdi), %xmm0
53 leaq 16(%rdi), %rdi
54 movdqa %xmm0, %xmm3
55 pcmpeqb %xmm1, %xmm0
56 pcmpeqb %xmm2, %xmm3
57 pmovmskb %xmm0, %eax
58 pmovmskb %xmm3, %edx
59 or %eax, %edx
60 jz L(loop)
61
62 pmovmskb %xmm3, %edx
63 test %eax, %eax
64 jnz L(matches)
65
66/* Return NULL. */
67 .p2align 4
68L(return_null):
69 xor %rax, %rax
70 ret
71
72L(matches):
73 /* There is a match. First find where NULL is. */
74 leaq -16(%rdi), %rdi
75 test %edx, %edx
76 jz L(match_case1)
77
78 .p2align 4
79L(match_case2):
80 test %al, %al
81 jz L(match_high_case2)
82
83 mov %al, %cl
84 and $15, %cl
85 jnz L(match_case2_4)
86
87 mov %dl, %ch
88 and $15, %ch
89 jnz L(return_null)
90
91 test $0x10, %al
92 jnz L(Exit5)
93 test $0x10, %dl
94 jnz L(return_null)
95 test $0x20, %al
96 jnz L(Exit6)
97 test $0x20, %dl
98 jnz L(return_null)
99 test $0x40, %al
100 jnz L(Exit7)
101 test $0x40, %dl
102 jnz L(return_null)
103 lea 7(%rdi), %rax
104 ret
105
106 .p2align 4
107L(match_case2_4):
108 test $0x01, %al
109 jnz L(Exit1)
110 test $0x01, %dl
111 jnz L(return_null)
112 test $0x02, %al
113 jnz L(Exit2)
114 test $0x02, %dl
115 jnz L(return_null)
116 test $0x04, %al
117 jnz L(Exit3)
118 test $0x04, %dl
119 jnz L(return_null)
120 lea 3(%rdi), %rax
121 ret
122
123 .p2align 4
124L(match_high_case2):
125 test %dl, %dl
126 jnz L(return_null)
127
128 mov %ah, %cl
129 and $15, %cl
130 jnz L(match_case2_12)
131
132 mov %dh, %ch
133 and $15, %ch
134 jnz L(return_null)
135
136 test $0x10, %ah
137 jnz L(Exit13)
138 test $0x10, %dh
139 jnz L(return_null)
140 test $0x20, %ah
141 jnz L(Exit14)
142 test $0x20, %dh
143 jnz L(return_null)
144 test $0x40, %ah
145 jnz L(Exit15)
146 test $0x40, %dh
147 jnz L(return_null)
148 lea 15(%rdi), %rax
149 ret
150
151 .p2align 4
152L(match_case2_12):
153 test $0x01, %ah
154 jnz L(Exit9)
155 test $0x01, %dh
156 jnz L(return_null)
157 test $0x02, %ah
158 jnz L(Exit10)
159 test $0x02, %dh
160 jnz L(return_null)
161 test $0x04, %ah
162 jnz L(Exit11)
163 test $0x04, %dh
164 jnz L(return_null)
165 lea 11(%rdi), %rax
166 ret
167
168 .p2align 4
169L(match_case1):
170 test %al, %al
171 jz L(match_high_case1)
172
173 test $0x01, %al
174 jnz L(Exit1)
175 test $0x02, %al
176 jnz L(Exit2)
177 test $0x04, %al
178 jnz L(Exit3)
179 test $0x08, %al
180 jnz L(Exit4)
181 test $0x10, %al
182 jnz L(Exit5)
183 test $0x20, %al
184 jnz L(Exit6)
185 test $0x40, %al
186 jnz L(Exit7)
187 lea 7(%rdi), %rax
188 ret
189
190 .p2align 4
191L(match_high_case1):
192 test $0x01, %ah
193 jnz L(Exit9)
194 test $0x02, %ah
195 jnz L(Exit10)
196 test $0x04, %ah
197 jnz L(Exit11)
198 test $0x08, %ah
199 jnz L(Exit12)
200 test $0x10, %ah
201 jnz L(Exit13)
202 test $0x20, %ah
203 jnz L(Exit14)
204 test $0x40, %ah
205 jnz L(Exit15)
206 lea 15(%rdi), %rax
207 ret
208
209 .p2align 4
210L(Exit1):
211 lea (%rdi), %rax
212 ret
213
214 .p2align 4
215L(Exit2):
216 lea 1(%rdi), %rax
217 ret
218
219 .p2align 4
220L(Exit3):
221 lea 2(%rdi), %rax
222 ret
223
224 .p2align 4
225L(Exit4):
226 lea 3(%rdi), %rax
227 ret
228
229 .p2align 4
230L(Exit5):
231 lea 4(%rdi), %rax
232 ret
233
234 .p2align 4
235L(Exit6):
236 lea 5(%rdi), %rax
237 ret
238
239 .p2align 4
240L(Exit7):
241 lea 6(%rdi), %rax
242 ret
243
244 .p2align 4
245L(Exit9):
246 lea 8(%rdi), %rax
247 ret
248
249 .p2align 4
250L(Exit10):
251 lea 9(%rdi), %rax
252 ret
253
254 .p2align 4
255L(Exit11):
256 lea 10(%rdi), %rax
257 ret
258
259 .p2align 4
260L(Exit12):
261 lea 11(%rdi), %rax
262 ret
263
264 .p2align 4
265L(Exit13):
266 lea 12(%rdi), %rax
267 ret
268
269 .p2align 4
270L(Exit14):
271 lea 13(%rdi), %rax
272 ret
273
274 .p2align 4
275L(Exit15):
276 lea 14(%rdi), %rax
277 ret
278
279END (__strchr_sse2_no_bsf)
280#endif
281