1/* wcschr with SSSE3
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22 .text
23ENTRY (__wcschr)
24
25 movd %rsi, %xmm1
26 pxor %xmm2, %xmm2
27 mov %rdi, %rcx
28 punpckldq %xmm1, %xmm1
29 punpckldq %xmm1, %xmm1
30
31 and $63, %rcx
32 cmp $48, %rcx
33 ja L(cross_cache)
34
35 movdqu (%rdi), %xmm0
36 pcmpeqd %xmm0, %xmm2
37 add $16, %rdi
38 pcmpeqd %xmm1, %xmm0
39 pmovmskb %xmm2, %rdx
40 pmovmskb %xmm0, %rax
41 or %rax, %rdx
42 jnz L(matches)
43
44 and $-16, %rdi
45
46 movdqa (%rdi), %xmm0
47 pcmpeqd %xmm0, %xmm2
48 add $16, %rdi
49 pcmpeqd %xmm1, %xmm0
50 pmovmskb %xmm2, %rdx
51 pmovmskb %xmm0, %rax
52 or %rax, %rdx
53 jnz L(matches)
54
55 jmp L(loop)
56
57L(cross_cache):
58 and $15, %rcx
59 and $-16, %rdi
60 movdqa (%rdi), %xmm0
61 pcmpeqd %xmm0, %xmm2
62 pcmpeqd %xmm1, %xmm0
63 pmovmskb %xmm2, %rdx
64 pmovmskb %xmm0, %rax
65
66 sar %cl, %rdx
67 sar %cl, %rax
68 test %rax, %rax
69 je L(unaligned_no_match)
70
71 bsf %rax, %rax
72 test %rdx, %rdx
73 je L(unaligned_match)
74 bsf %rdx, %rdx
75 cmp %rdx, %rax
76 ja L(return_null)
77
78L(unaligned_match):
79 add %rdi, %rax
80 add %rcx, %rax
81 ret
82
83 .p2align 4
84L(unaligned_no_match):
85 test %rdx, %rdx
86 jne L(return_null)
87 pxor %xmm2, %xmm2
88
89 add $16, %rdi
90
91 .p2align 4
92/* Loop start on aligned string. */
93L(loop):
94 movdqa (%rdi), %xmm0
95 pcmpeqd %xmm0, %xmm2
96 add $16, %rdi
97 pcmpeqd %xmm1, %xmm0
98 pmovmskb %xmm2, %rdx
99 pmovmskb %xmm0, %rax
100 or %rax, %rdx
101 jnz L(matches)
102
103 movdqa (%rdi), %xmm0
104 pcmpeqd %xmm0, %xmm2
105 add $16, %rdi
106 pcmpeqd %xmm1, %xmm0
107 pmovmskb %xmm2, %rdx
108 pmovmskb %xmm0, %rax
109 or %rax, %rdx
110 jnz L(matches)
111
112 movdqa (%rdi), %xmm0
113 pcmpeqd %xmm0, %xmm2
114 add $16, %rdi
115 pcmpeqd %xmm1, %xmm0
116 pmovmskb %xmm2, %rdx
117 pmovmskb %xmm0, %rax
118 or %rax, %rdx
119 jnz L(matches)
120
121 movdqa (%rdi), %xmm0
122 pcmpeqd %xmm0, %xmm2
123 add $16, %rdi
124 pcmpeqd %xmm1, %xmm0
125 pmovmskb %xmm2, %rdx
126 pmovmskb %xmm0, %rax
127 or %rax, %rdx
128 jnz L(matches)
129 jmp L(loop)
130
131 .p2align 4
132L(matches):
133 pmovmskb %xmm2, %rdx
134 test %rax, %rax
135 jz L(return_null)
136 bsf %rax, %rax
137 test %rdx, %rdx
138 je L(match)
139 bsf %rdx, %rcx
140 cmp %rcx, %rax
141 ja L(return_null)
142L(match):
143 sub $16, %rdi
144 add %rdi, %rax
145 ret
146
147 .p2align 4
148L(return_null):
149 xor %rax, %rax
150 ret
151
152END (__wcschr)
153
154libc_hidden_def(__wcschr)
155weak_alias (__wcschr, wcschr)
156libc_hidden_weak (wcschr)
157