1/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
2 For AMD x86-64.
3 Copyright (C) 2009-2016 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22 .text
23ENTRY (strchr)
24 movd %esi, %xmm1
25 movl %edi, %eax
26 andl $4095, %eax
27 punpcklbw %xmm1, %xmm1
28 cmpl $4032, %eax
29 punpcklwd %xmm1, %xmm1
30 pshufd $0, %xmm1, %xmm1
31 jg L(cross_page)
32 movdqu (%rdi), %xmm0
33 pxor %xmm3, %xmm3
34 movdqa %xmm0, %xmm4
35 pcmpeqb %xmm1, %xmm0
36 pcmpeqb %xmm3, %xmm4
37 por %xmm4, %xmm0
38 pmovmskb %xmm0, %eax
39 test %eax, %eax
40 je L(next_48_bytes)
41 bsf %eax, %eax
42#ifdef AS_STRCHRNUL
43 leaq (%rdi,%rax), %rax
44#else
45 movl $0, %edx
46 leaq (%rdi,%rax), %rax
47 cmpb %sil, (%rax)
48 cmovne %rdx, %rax
49#endif
50 ret
51
52 .p2align 3
53 L(next_48_bytes):
54 movdqu 16(%rdi), %xmm0
55 movdqa %xmm0, %xmm4
56 pcmpeqb %xmm1, %xmm0
57 pcmpeqb %xmm3, %xmm4
58 por %xmm4, %xmm0
59 pmovmskb %xmm0, %ecx
60 movdqu 32(%rdi), %xmm0
61 movdqa %xmm0, %xmm4
62 pcmpeqb %xmm1, %xmm0
63 salq $16, %rcx
64 pcmpeqb %xmm3, %xmm4
65 por %xmm4, %xmm0
66 pmovmskb %xmm0, %eax
67 movdqu 48(%rdi), %xmm0
68 pcmpeqb %xmm0, %xmm3
69 salq $32, %rax
70 pcmpeqb %xmm1, %xmm0
71 orq %rcx, %rax
72 por %xmm3, %xmm0
73 pmovmskb %xmm0, %ecx
74 salq $48, %rcx
75 orq %rcx, %rax
76 testq %rax, %rax
77 jne L(return)
78L(loop_start):
79 /* We use this alignment to force loop be aligned to 8 but not
80 16 bytes. This gives better sheduling on AMD processors. */
81 .p2align 4
82 pxor %xmm6, %xmm6
83 andq $-64, %rdi
84 .p2align 3
85L(loop64):
86 addq $64, %rdi
87 movdqa (%rdi), %xmm5
88 movdqa 16(%rdi), %xmm2
89 movdqa 32(%rdi), %xmm3
90 pxor %xmm1, %xmm5
91 movdqa 48(%rdi), %xmm4
92 pxor %xmm1, %xmm2
93 pxor %xmm1, %xmm3
94 pminub (%rdi), %xmm5
95 pxor %xmm1, %xmm4
96 pminub 16(%rdi), %xmm2
97 pminub 32(%rdi), %xmm3
98 pminub %xmm2, %xmm5
99 pminub 48(%rdi), %xmm4
100 pminub %xmm3, %xmm5
101 pminub %xmm4, %xmm5
102 pcmpeqb %xmm6, %xmm5
103 pmovmskb %xmm5, %eax
104
105 testl %eax, %eax
106 je L(loop64)
107
108 movdqa (%rdi), %xmm5
109 movdqa %xmm5, %xmm0
110 pcmpeqb %xmm1, %xmm5
111 pcmpeqb %xmm6, %xmm0
112 por %xmm0, %xmm5
113 pcmpeqb %xmm6, %xmm2
114 pcmpeqb %xmm6, %xmm3
115 pcmpeqb %xmm6, %xmm4
116
117 pmovmskb %xmm5, %ecx
118 pmovmskb %xmm2, %eax
119 salq $16, %rax
120 pmovmskb %xmm3, %r8d
121 pmovmskb %xmm4, %edx
122 salq $32, %r8
123 orq %r8, %rax
124 orq %rcx, %rax
125 salq $48, %rdx
126 orq %rdx, %rax
127 .p2align 3
128L(return):
129 bsfq %rax, %rax
130#ifdef AS_STRCHRNUL
131 leaq (%rdi,%rax), %rax
132#else
133 movl $0, %edx
134 leaq (%rdi,%rax), %rax
135 cmpb %sil, (%rax)
136 cmovne %rdx, %rax
137#endif
138 ret
139 .p2align 4
140
141L(cross_page):
142 movq %rdi, %rdx
143 pxor %xmm2, %xmm2
144 andq $-64, %rdx
145 movdqa %xmm1, %xmm0
146 movdqa (%rdx), %xmm3
147 movdqa %xmm3, %xmm4
148 pcmpeqb %xmm1, %xmm3
149 pcmpeqb %xmm2, %xmm4
150 por %xmm4, %xmm3
151 pmovmskb %xmm3, %r8d
152 movdqa 16(%rdx), %xmm3
153 movdqa %xmm3, %xmm4
154 pcmpeqb %xmm1, %xmm3
155 pcmpeqb %xmm2, %xmm4
156 por %xmm4, %xmm3
157 pmovmskb %xmm3, %eax
158 movdqa 32(%rdx), %xmm3
159 movdqa %xmm3, %xmm4
160 pcmpeqb %xmm1, %xmm3
161 salq $16, %rax
162 pcmpeqb %xmm2, %xmm4
163 por %xmm4, %xmm3
164 pmovmskb %xmm3, %r9d
165 movdqa 48(%rdx), %xmm3
166 pcmpeqb %xmm3, %xmm2
167 salq $32, %r9
168 pcmpeqb %xmm3, %xmm0
169 orq %r9, %rax
170 orq %r8, %rax
171 por %xmm2, %xmm0
172 pmovmskb %xmm0, %ecx
173 salq $48, %rcx
174 orq %rcx, %rax
175 movl %edi, %ecx
176 subb %dl, %cl
177 shrq %cl, %rax
178 testq %rax, %rax
179 jne L(return)
180 jmp L(loop_start)
181
182END (strchr)
183
184#ifndef AS_STRCHRNUL
185weak_alias (strchr, index)
186libc_hidden_builtin_def (strchr)
187#endif
188