1/* Multiple versions of strcmp
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include <init-arch.h>
22
23#ifdef USE_AS_STRNCMP
24/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
25 if the new counter > the old one or is 0. */
26# define UPDATE_STRNCMP_COUNTER \
27 /* calculate left number to compare */ \
28 lea -16(%rcx, %r11), %r9; \
29 cmp %r9, %r11; \
30 jb LABEL(strcmp_exitz); \
31 test %r9, %r9; \
32 je LABEL(strcmp_exitz); \
33 mov %r9, %r11
34
35# define STRCMP_SSE42 __strncmp_sse42
36# define STRCMP_SSSE3 __strncmp_ssse3
37# define STRCMP_SSE2 __strncmp_sse2
38# define __GI_STRCMP __GI_strncmp
39#elif defined USE_AS_STRCASECMP_L
40# include "locale-defines.h"
41
42# define UPDATE_STRNCMP_COUNTER
43
44# define STRCMP_AVX __strcasecmp_l_avx
45# define STRCMP_SSE42 __strcasecmp_l_sse42
46# define STRCMP_SSSE3 __strcasecmp_l_ssse3
47# define STRCMP_SSE2 __strcasecmp_l_sse2
48# define __GI_STRCMP __GI___strcasecmp_l
49#elif defined USE_AS_STRNCASECMP_L
50# include "locale-defines.h"
51
52/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
53 if the new counter > the old one or is 0. */
54# define UPDATE_STRNCMP_COUNTER \
55 /* calculate left number to compare */ \
56 lea -16(%rcx, %r11), %r9; \
57 cmp %r9, %r11; \
58 jb LABEL(strcmp_exitz); \
59 test %r9, %r9; \
60 je LABEL(strcmp_exitz); \
61 mov %r9, %r11
62
63# define STRCMP_AVX __strncasecmp_l_avx
64# define STRCMP_SSE42 __strncasecmp_l_sse42
65# define STRCMP_SSSE3 __strncasecmp_l_ssse3
66# define STRCMP_SSE2 __strncasecmp_l_sse2
67# define __GI_STRCMP __GI___strncasecmp_l
68#else
69# define USE_AS_STRCMP
70# define UPDATE_STRNCMP_COUNTER
71# ifndef STRCMP
72# define STRCMP strcmp
73# define STRCMP_SSE42 __strcmp_sse42
74# define STRCMP_SSSE3 __strcmp_ssse3
75# define STRCMP_SSE2 __strcmp_sse2
76# define __GI_STRCMP __GI_strcmp
77# endif
78#endif
79
80/* Define multiple versions only for the definition in libc. Don't
81 define multiple versions for strncmp in static library since we
82 need strncmp before the initialization happened. */
83#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
84 .text
85ENTRY(STRCMP)
86 .type STRCMP, @gnu_indirect_function
87 LOAD_RTLD_GLOBAL_RO_RDX
88#ifdef USE_AS_STRCMP
89 leaq __strcmp_sse2_unaligned(%rip), %rax
90 HAS_ARCH_FEATURE (Fast_Unaligned_Load)
91 jnz 3f
92#else
93 HAS_ARCH_FEATURE (Slow_SSE4_2)
94 jnz 2f
95 leaq STRCMP_SSE42(%rip), %rax
96 HAS_CPU_FEATURE (SSE4_2)
97 jnz 3f
98#endif
992: leaq STRCMP_SSSE3(%rip), %rax
100 HAS_CPU_FEATURE (SSSE3)
101 jnz 3f
102 leaq STRCMP_SSE2(%rip), %rax
1033: ret
104END(STRCMP)
105
106# ifdef USE_AS_STRCASECMP_L
107ENTRY(__strcasecmp)
108 .type __strcasecmp, @gnu_indirect_function
109 LOAD_RTLD_GLOBAL_RO_RDX
110 leaq __strcasecmp_avx(%rip), %rax
111 HAS_ARCH_FEATURE (AVX_Usable)
112 jnz 3f
113 HAS_ARCH_FEATURE (Slow_SSE4_2)
114 jnz 2f
115 leaq __strcasecmp_sse42(%rip), %rax
116 HAS_CPU_FEATURE (SSE4_2)
117 jnz 3f
1182: leaq __strcasecmp_ssse3(%rip), %rax
119 HAS_CPU_FEATURE (SSSE3)
120 jnz 3f
121 leaq __strcasecmp_sse2(%rip), %rax
1223: ret
123END(__strcasecmp)
124weak_alias (__strcasecmp, strcasecmp)
125# endif
126# ifdef USE_AS_STRNCASECMP_L
127ENTRY(__strncasecmp)
128 .type __strncasecmp, @gnu_indirect_function
129 LOAD_RTLD_GLOBAL_RO_RDX
130 leaq __strncasecmp_avx(%rip), %rax
131 HAS_ARCH_FEATURE (AVX_Usable)
132 jnz 3f
133 HAS_ARCH_FEATURE (Slow_SSE4_2)
134 jnz 2f
135 leaq __strncasecmp_sse42(%rip), %rax
136 HAS_CPU_FEATURE (SSE4_2)
137 jnz 3f
1382: leaq __strncasecmp_ssse3(%rip), %rax
139 HAS_CPU_FEATURE (SSSE3)
140 jnz 3f
141 leaq __strncasecmp_sse2(%rip), %rax
1423: ret
143END(__strncasecmp)
144weak_alias (__strncasecmp, strncasecmp)
145# endif
146
147# undef LABEL
148# define LABEL(l) .L##l##_sse42
149# define GLABEL(l) l##_sse42
150# define SECTION sse4.2
151# include "strcmp-sse42.S"
152
153
154# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
155# define LABEL(l) .L##l##_avx
156# define GLABEL(l) l##_avx
157# define USE_AVX 1
158# undef STRCMP_SSE42
159# define STRCMP_SSE42 STRCMP_AVX
160# define SECTION avx
161# include "strcmp-sse42.S"
162# endif
163
164
165# undef ENTRY
166# define ENTRY(name) \
167 .type STRCMP_SSE2, @function; \
168 .align 16; \
169 .globl STRCMP_SSE2; \
170 .hidden STRCMP_SSE2; \
171 STRCMP_SSE2: cfi_startproc; \
172 CALL_MCOUNT
173# undef END
174# define END(name) \
175 cfi_endproc; .size STRCMP_SSE2, .-STRCMP_SSE2
176
177# ifdef USE_AS_STRCASECMP_L
178# define ENTRY2(name) \
179 .type __strcasecmp_sse2, @function; \
180 .align 16; \
181 .globl __strcasecmp_sse2; \
182 .hidden __strcasecmp_sse2; \
183 __strcasecmp_sse2: cfi_startproc; \
184 CALL_MCOUNT
185# define END2(name) \
186 cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2
187# endif
188
189# ifdef USE_AS_STRNCASECMP_L
190# define ENTRY2(name) \
191 .type __strncasecmp_sse2, @function; \
192 .align 16; \
193 .globl __strncasecmp_sse2; \
194 .hidden __strncasecmp_sse2; \
195 __strncasecmp_sse2: cfi_startproc; \
196 CALL_MCOUNT
197# define END2(name) \
198 cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
199# endif
200
201# undef libc_hidden_builtin_def
202/* It doesn't make sense to send libc-internal strcmp calls through a PLT.
203 The speedup we get from using SSE4.2 instruction is likely eaten away
204 by the indirect call in the PLT. */
205# define libc_hidden_builtin_def(name) \
206 .globl __GI_STRCMP; __GI_STRCMP = STRCMP_SSE2
207#endif
208
209#include "../strcmp.S"
210