1/* Optimized wcscmp for x86-64 with SSE2.
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
23
24 .text
25ENTRY (__wcscmp)
26/*
27 * This implementation uses SSE to compare up to 16 bytes at a time.
28*/
29 mov %esi, %eax
30 mov %edi, %edx
31 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
32 mov %al, %ch
33 mov %dl, %cl
34 and $63, %eax /* rsi alignment in cache line */
35 and $63, %edx /* rdi alignment in cache line */
36 and $15, %cl
37 jz L(continue_00)
38 cmp $16, %edx
39 jb L(continue_0)
40 cmp $32, %edx
41 jb L(continue_16)
42 cmp $48, %edx
43 jb L(continue_32)
44
45L(continue_48):
46 and $15, %ch
47 jz L(continue_48_00)
48 cmp $16, %eax
49 jb L(continue_0_48)
50 cmp $32, %eax
51 jb L(continue_16_48)
52 cmp $48, %eax
53 jb L(continue_32_48)
54
55 .p2align 4
56L(continue_48_48):
57 mov (%rsi), %ecx
58 cmp %ecx, (%rdi)
59 jne L(nequal)
60 test %ecx, %ecx
61 jz L(equal)
62
63 mov 4(%rsi), %ecx
64 cmp %ecx, 4(%rdi)
65 jne L(nequal)
66 test %ecx, %ecx
67 jz L(equal)
68
69 mov 8(%rsi), %ecx
70 cmp %ecx, 8(%rdi)
71 jne L(nequal)
72 test %ecx, %ecx
73 jz L(equal)
74
75 mov 12(%rsi), %ecx
76 cmp %ecx, 12(%rdi)
77 jne L(nequal)
78 test %ecx, %ecx
79 jz L(equal)
80
81 movdqu 16(%rdi), %xmm1
82 movdqu 16(%rsi), %xmm2
83 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
84 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
85 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
86 pmovmskb %xmm1, %edx
87 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
88 jnz L(less4_double_words_16)
89
90 movdqu 32(%rdi), %xmm1
91 movdqu 32(%rsi), %xmm2
92 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
93 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
94 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
95 pmovmskb %xmm1, %edx
96 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
97 jnz L(less4_double_words_32)
98
99 movdqu 48(%rdi), %xmm1
100 movdqu 48(%rsi), %xmm2
101 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
102 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
103 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
104 pmovmskb %xmm1, %edx
105 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
106 jnz L(less4_double_words_48)
107
108 add $64, %rsi
109 add $64, %rdi
110 jmp L(continue_48_48)
111
112L(continue_0):
113 and $15, %ch
114 jz L(continue_0_00)
115 cmp $16, %eax
116 jb L(continue_0_0)
117 cmp $32, %eax
118 jb L(continue_0_16)
119 cmp $48, %eax
120 jb L(continue_0_32)
121
122 .p2align 4
123L(continue_0_48):
124 mov (%rsi), %ecx
125 cmp %ecx, (%rdi)
126 jne L(nequal)
127 test %ecx, %ecx
128 jz L(equal)
129
130 mov 4(%rsi), %ecx
131 cmp %ecx, 4(%rdi)
132 jne L(nequal)
133 test %ecx, %ecx
134 jz L(equal)
135
136 mov 8(%rsi), %ecx
137 cmp %ecx, 8(%rdi)
138 jne L(nequal)
139 test %ecx, %ecx
140 jz L(equal)
141
142 mov 12(%rsi), %ecx
143 cmp %ecx, 12(%rdi)
144 jne L(nequal)
145 test %ecx, %ecx
146 jz L(equal)
147
148 movdqu 16(%rdi), %xmm1
149 movdqu 16(%rsi), %xmm2
150 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
151 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
152 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
153 pmovmskb %xmm1, %edx
154 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
155 jnz L(less4_double_words_16)
156
157 movdqu 32(%rdi), %xmm1
158 movdqu 32(%rsi), %xmm2
159 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
160 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
161 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
162 pmovmskb %xmm1, %edx
163 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
164 jnz L(less4_double_words_32)
165
166 mov 48(%rsi), %ecx
167 cmp %ecx, 48(%rdi)
168 jne L(nequal)
169 test %ecx, %ecx
170 jz L(equal)
171
172 mov 52(%rsi), %ecx
173 cmp %ecx, 52(%rdi)
174 jne L(nequal)
175 test %ecx, %ecx
176 jz L(equal)
177
178 mov 56(%rsi), %ecx
179 cmp %ecx, 56(%rdi)
180 jne L(nequal)
181 test %ecx, %ecx
182 jz L(equal)
183
184 mov 60(%rsi), %ecx
185 cmp %ecx, 60(%rdi)
186 jne L(nequal)
187 test %ecx, %ecx
188 jz L(equal)
189
190 add $64, %rsi
191 add $64, %rdi
192 jmp L(continue_0_48)
193
194 .p2align 4
195L(continue_00):
196 and $15, %ch
197 jz L(continue_00_00)
198 cmp $16, %eax
199 jb L(continue_00_0)
200 cmp $32, %eax
201 jb L(continue_00_16)
202 cmp $48, %eax
203 jb L(continue_00_32)
204
205 .p2align 4
206L(continue_00_48):
207 pcmpeqd (%rdi), %xmm0
208 mov (%rdi), %eax
209 pmovmskb %xmm0, %ecx
210 test %ecx, %ecx
211 jnz L(less4_double_words1)
212
213 cmp (%rsi), %eax
214 jne L(nequal)
215
216 mov 4(%rdi), %eax
217 cmp 4(%rsi), %eax
218 jne L(nequal)
219
220 mov 8(%rdi), %eax
221 cmp 8(%rsi), %eax
222 jne L(nequal)
223
224 mov 12(%rdi), %eax
225 cmp 12(%rsi), %eax
226 jne L(nequal)
227
228 movdqu 16(%rsi), %xmm2
229 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
230 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
231 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
232 pmovmskb %xmm2, %edx
233 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
234 jnz L(less4_double_words_16)
235
236 movdqu 32(%rsi), %xmm2
237 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
238 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
239 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
240 pmovmskb %xmm2, %edx
241 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
242 jnz L(less4_double_words_32)
243
244 movdqu 48(%rsi), %xmm2
245 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
246 pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */
247 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
248 pmovmskb %xmm2, %edx
249 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
250 jnz L(less4_double_words_48)
251
252 add $64, %rsi
253 add $64, %rdi
254 jmp L(continue_00_48)
255
256 .p2align 4
257L(continue_32):
258 and $15, %ch
259 jz L(continue_32_00)
260 cmp $16, %eax
261 jb L(continue_0_32)
262 cmp $32, %eax
263 jb L(continue_16_32)
264 cmp $48, %eax
265 jb L(continue_32_32)
266
267 .p2align 4
268L(continue_32_48):
269 mov (%rsi), %ecx
270 cmp %ecx, (%rdi)
271 jne L(nequal)
272 test %ecx, %ecx
273 jz L(equal)
274
275 mov 4(%rsi), %ecx
276 cmp %ecx, 4(%rdi)
277 jne L(nequal)
278 test %ecx, %ecx
279 jz L(equal)
280
281 mov 8(%rsi), %ecx
282 cmp %ecx, 8(%rdi)
283 jne L(nequal)
284 test %ecx, %ecx
285 jz L(equal)
286
287 mov 12(%rsi), %ecx
288 cmp %ecx, 12(%rdi)
289 jne L(nequal)
290 test %ecx, %ecx
291 jz L(equal)
292
293 mov 16(%rsi), %ecx
294 cmp %ecx, 16(%rdi)
295 jne L(nequal)
296 test %ecx, %ecx
297 jz L(equal)
298
299 mov 20(%rsi), %ecx
300 cmp %ecx, 20(%rdi)
301 jne L(nequal)
302 test %ecx, %ecx
303 jz L(equal)
304
305 mov 24(%rsi), %ecx
306 cmp %ecx, 24(%rdi)
307 jne L(nequal)
308 test %ecx, %ecx
309 jz L(equal)
310
311 mov 28(%rsi), %ecx
312 cmp %ecx, 28(%rdi)
313 jne L(nequal)
314 test %ecx, %ecx
315 jz L(equal)
316
317 movdqu 32(%rdi), %xmm1
318 movdqu 32(%rsi), %xmm2
319 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
320 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
321 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
322 pmovmskb %xmm1, %edx
323 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
324 jnz L(less4_double_words_32)
325
326 movdqu 48(%rdi), %xmm1
327 movdqu 48(%rsi), %xmm2
328 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
329 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
330 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
331 pmovmskb %xmm1, %edx
332 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
333 jnz L(less4_double_words_48)
334
335 add $64, %rsi
336 add $64, %rdi
337 jmp L(continue_32_48)
338
339 .p2align 4
340L(continue_16):
341 and $15, %ch
342 jz L(continue_16_00)
343 cmp $16, %eax
344 jb L(continue_0_16)
345 cmp $32, %eax
346 jb L(continue_16_16)
347 cmp $48, %eax
348 jb L(continue_16_32)
349
350 .p2align 4
351L(continue_16_48):
352 mov (%rsi), %ecx
353 cmp %ecx, (%rdi)
354 jne L(nequal)
355 test %ecx, %ecx
356 jz L(equal)
357
358 mov 4(%rsi), %ecx
359 cmp %ecx, 4(%rdi)
360 jne L(nequal)
361 test %ecx, %ecx
362 jz L(equal)
363
364 mov 8(%rsi), %ecx
365 cmp %ecx, 8(%rdi)
366 jne L(nequal)
367 test %ecx, %ecx
368 jz L(equal)
369
370 mov 12(%rsi), %ecx
371 cmp %ecx, 12(%rdi)
372 jne L(nequal)
373 test %ecx, %ecx
374 jz L(equal)
375
376 movdqu 16(%rdi), %xmm1
377 movdqu 16(%rsi), %xmm2
378 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
379 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
380 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
381 pmovmskb %xmm1, %edx
382 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
383 jnz L(less4_double_words_16)
384
385 mov 32(%rsi), %ecx
386 cmp %ecx, 32(%rdi)
387 jne L(nequal)
388 test %ecx, %ecx
389 jz L(equal)
390
391 mov 36(%rsi), %ecx
392 cmp %ecx, 36(%rdi)
393 jne L(nequal)
394 test %ecx, %ecx
395 jz L(equal)
396
397 mov 40(%rsi), %ecx
398 cmp %ecx, 40(%rdi)
399 jne L(nequal)
400 test %ecx, %ecx
401 jz L(equal)
402
403 mov 44(%rsi), %ecx
404 cmp %ecx, 44(%rdi)
405 jne L(nequal)
406 test %ecx, %ecx
407 jz L(equal)
408
409 movdqu 48(%rdi), %xmm1
410 movdqu 48(%rsi), %xmm2
411 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
412 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
413 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
414 pmovmskb %xmm1, %edx
415 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
416 jnz L(less4_double_words_48)
417
418 add $64, %rsi
419 add $64, %rdi
420 jmp L(continue_16_48)
421
422 .p2align 4
423L(continue_00_00):
424 movdqa (%rdi), %xmm1
425 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
426 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
427 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
428 pmovmskb %xmm1, %edx
429 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
430 jnz L(less4_double_words)
431
432 movdqa 16(%rdi), %xmm3
433 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
434 pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */
435 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
436 pmovmskb %xmm3, %edx
437 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
438 jnz L(less4_double_words_16)
439
440 movdqa 32(%rdi), %xmm5
441 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
442 pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */
443 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
444 pmovmskb %xmm5, %edx
445 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
446 jnz L(less4_double_words_32)
447
448 movdqa 48(%rdi), %xmm1
449 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
450 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
451 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
452 pmovmskb %xmm1, %edx
453 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
454 jnz L(less4_double_words_48)
455
456 add $64, %rsi
457 add $64, %rdi
458 jmp L(continue_00_00)
459
460 .p2align 4
461L(continue_00_32):
462 movdqu (%rsi), %xmm2
463 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
464 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
465 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
466 pmovmskb %xmm2, %edx
467 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
468 jnz L(less4_double_words)
469
470 add $16, %rsi
471 add $16, %rdi
472 jmp L(continue_00_48)
473
474 .p2align 4
475L(continue_00_16):
476 movdqu (%rsi), %xmm2
477 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
478 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
479 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
480 pmovmskb %xmm2, %edx
481 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
482 jnz L(less4_double_words)
483
484 movdqu 16(%rsi), %xmm2
485 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
486 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
487 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
488 pmovmskb %xmm2, %edx
489 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
490 jnz L(less4_double_words_16)
491
492 add $32, %rsi
493 add $32, %rdi
494 jmp L(continue_00_48)
495
496 .p2align 4
497L(continue_00_0):
498 movdqu (%rsi), %xmm2
499 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
500 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
501 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
502 pmovmskb %xmm2, %edx
503 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
504 jnz L(less4_double_words)
505
506 movdqu 16(%rsi), %xmm2
507 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
508 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
509 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
510 pmovmskb %xmm2, %edx
511 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
512 jnz L(less4_double_words_16)
513
514 movdqu 32(%rsi), %xmm2
515 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
516 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
517 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
518 pmovmskb %xmm2, %edx
519 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
520 jnz L(less4_double_words_32)
521
522 add $48, %rsi
523 add $48, %rdi
524 jmp L(continue_00_48)
525
526 .p2align 4
527L(continue_48_00):
528 pcmpeqd (%rsi), %xmm0
529 mov (%rdi), %eax
530 pmovmskb %xmm0, %ecx
531 test %ecx, %ecx
532 jnz L(less4_double_words1)
533
534 cmp (%rsi), %eax
535 jne L(nequal)
536
537 mov 4(%rdi), %eax
538 cmp 4(%rsi), %eax
539 jne L(nequal)
540
541 mov 8(%rdi), %eax
542 cmp 8(%rsi), %eax
543 jne L(nequal)
544
545 mov 12(%rdi), %eax
546 cmp 12(%rsi), %eax
547 jne L(nequal)
548
549 movdqu 16(%rdi), %xmm1
550 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
551 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
552 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
553 pmovmskb %xmm1, %edx
554 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
555 jnz L(less4_double_words_16)
556
557 movdqu 32(%rdi), %xmm1
558 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
559 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
560 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
561 pmovmskb %xmm1, %edx
562 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
563 jnz L(less4_double_words_32)
564
565 movdqu 48(%rdi), %xmm1
566 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
567 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
568 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
569 pmovmskb %xmm1, %edx
570 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
571 jnz L(less4_double_words_48)
572
573 add $64, %rsi
574 add $64, %rdi
575 jmp L(continue_48_00)
576
577 .p2align 4
578L(continue_32_00):
579 movdqu (%rdi), %xmm1
580 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
581 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
582 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
583 pmovmskb %xmm1, %edx
584 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
585 jnz L(less4_double_words)
586
587 add $16, %rsi
588 add $16, %rdi
589 jmp L(continue_48_00)
590
591 .p2align 4
592L(continue_16_00):
593 movdqu (%rdi), %xmm1
594 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
595 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
596 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
597 pmovmskb %xmm1, %edx
598 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
599 jnz L(less4_double_words)
600
601 movdqu 16(%rdi), %xmm1
602 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
603 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
604 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
605 pmovmskb %xmm1, %edx
606 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
607 jnz L(less4_double_words_16)
608
609 add $32, %rsi
610 add $32, %rdi
611 jmp L(continue_48_00)
612
613 .p2align 4
614L(continue_0_00):
615 movdqu (%rdi), %xmm1
616 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
617 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
618 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
619 pmovmskb %xmm1, %edx
620 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
621 jnz L(less4_double_words)
622
623 movdqu 16(%rdi), %xmm1
624 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
625 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
626 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
627 pmovmskb %xmm1, %edx
628 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
629 jnz L(less4_double_words_16)
630
631 movdqu 32(%rdi), %xmm1
632 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
633 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
634 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
635 pmovmskb %xmm1, %edx
636 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
637 jnz L(less4_double_words_32)
638
639 add $48, %rsi
640 add $48, %rdi
641 jmp L(continue_48_00)
642
643 .p2align 4
644L(continue_32_32):
645 movdqu (%rdi), %xmm1
646 movdqu (%rsi), %xmm2
647 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
648 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
649 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
650 pmovmskb %xmm1, %edx
651 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
652 jnz L(less4_double_words)
653
654 add $16, %rsi
655 add $16, %rdi
656 jmp L(continue_48_48)
657
658 .p2align 4
659L(continue_16_16):
660 movdqu (%rdi), %xmm1
661 movdqu (%rsi), %xmm2
662 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
663 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
664 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
665 pmovmskb %xmm1, %edx
666 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
667 jnz L(less4_double_words)
668
669 movdqu 16(%rdi), %xmm3
670 movdqu 16(%rsi), %xmm4
671 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
672 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
673 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
674 pmovmskb %xmm3, %edx
675 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
676 jnz L(less4_double_words_16)
677
678 add $32, %rsi
679 add $32, %rdi
680 jmp L(continue_48_48)
681
682 .p2align 4
683L(continue_0_0):
684 movdqu (%rdi), %xmm1
685 movdqu (%rsi), %xmm2
686 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
687 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
688 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
689 pmovmskb %xmm1, %edx
690 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
691 jnz L(less4_double_words)
692
693 movdqu 16(%rdi), %xmm3
694 movdqu 16(%rsi), %xmm4
695 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
696 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
697 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
698 pmovmskb %xmm3, %edx
699 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
700 jnz L(less4_double_words_16)
701
702 movdqu 32(%rdi), %xmm1
703 movdqu 32(%rsi), %xmm2
704 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
705 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
706 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
707 pmovmskb %xmm1, %edx
708 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
709 jnz L(less4_double_words_32)
710
711 add $48, %rsi
712 add $48, %rdi
713 jmp L(continue_48_48)
714
715 .p2align 4
716L(continue_0_16):
717 movdqu (%rdi), %xmm1
718 movdqu (%rsi), %xmm2
719 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
720 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
721 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
722 pmovmskb %xmm1, %edx
723 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
724 jnz L(less4_double_words)
725
726 movdqu 16(%rdi), %xmm1
727 movdqu 16(%rsi), %xmm2
728 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
729 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
730 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
731 pmovmskb %xmm1, %edx
732 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
733 jnz L(less4_double_words_16)
734
735 add $32, %rsi
736 add $32, %rdi
737 jmp L(continue_32_48)
738
739 .p2align 4
740L(continue_0_32):
741 movdqu (%rdi), %xmm1
742 movdqu (%rsi), %xmm2
743 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
744 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
745 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
746 pmovmskb %xmm1, %edx
747 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
748 jnz L(less4_double_words)
749
750 add $16, %rsi
751 add $16, %rdi
752 jmp L(continue_16_48)
753
754 .p2align 4
755L(continue_16_32):
756 movdqu (%rdi), %xmm1
757 movdqu (%rsi), %xmm2
758 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
759 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
760 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
761 pmovmskb %xmm1, %edx
762 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
763 jnz L(less4_double_words)
764
765 add $16, %rsi
766 add $16, %rdi
767 jmp L(continue_32_48)
768
769 .p2align 4
770L(less4_double_words1):
771 cmp (%rsi), %eax
772 jne L(nequal)
773 test %eax, %eax
774 jz L(equal)
775
776 mov 4(%rsi), %ecx
777 cmp %ecx, 4(%rdi)
778 jne L(nequal)
779 test %ecx, %ecx
780 jz L(equal)
781
782 mov 8(%rsi), %ecx
783 cmp %ecx, 8(%rdi)
784 jne L(nequal)
785 test %ecx, %ecx
786 jz L(equal)
787
788 mov 12(%rsi), %ecx
789 cmp %ecx, 12(%rdi)
790 jne L(nequal)
791 xor %eax, %eax
792 ret
793
794 .p2align 4
795L(less4_double_words):
796 xor %eax, %eax
797 test %dl, %dl
798 jz L(next_two_double_words)
799 and $15, %dl
800 jz L(second_double_word)
801 mov (%rdi), %eax
802 cmp (%rsi), %eax
803 jne L(nequal)
804 ret
805
806 .p2align 4
807L(second_double_word):
808 mov 4(%rdi), %eax
809 cmp 4(%rsi), %eax
810 jne L(nequal)
811 ret
812
813 .p2align 4
814L(next_two_double_words):
815 and $15, %dh
816 jz L(fourth_double_word)
817 mov 8(%rdi), %eax
818 cmp 8(%rsi), %eax
819 jne L(nequal)
820 ret
821
822 .p2align 4
823L(fourth_double_word):
824 mov 12(%rdi), %eax
825 cmp 12(%rsi), %eax
826 jne L(nequal)
827 ret
828
829 .p2align 4
830L(less4_double_words_16):
831 xor %eax, %eax
832 test %dl, %dl
833 jz L(next_two_double_words_16)
834 and $15, %dl
835 jz L(second_double_word_16)
836 mov 16(%rdi), %eax
837 cmp 16(%rsi), %eax
838 jne L(nequal)
839 ret
840
841 .p2align 4
842L(second_double_word_16):
843 mov 20(%rdi), %eax
844 cmp 20(%rsi), %eax
845 jne L(nequal)
846 ret
847
848 .p2align 4
849L(next_two_double_words_16):
850 and $15, %dh
851 jz L(fourth_double_word_16)
852 mov 24(%rdi), %eax
853 cmp 24(%rsi), %eax
854 jne L(nequal)
855 ret
856
857 .p2align 4
858L(fourth_double_word_16):
859 mov 28(%rdi), %eax
860 cmp 28(%rsi), %eax
861 jne L(nequal)
862 ret
863
864 .p2align 4
865L(less4_double_words_32):
866 xor %eax, %eax
867 test %dl, %dl
868 jz L(next_two_double_words_32)
869 and $15, %dl
870 jz L(second_double_word_32)
871 mov 32(%rdi), %eax
872 cmp 32(%rsi), %eax
873 jne L(nequal)
874 ret
875
876 .p2align 4
877L(second_double_word_32):
878 mov 36(%rdi), %eax
879 cmp 36(%rsi), %eax
880 jne L(nequal)
881 ret
882
883 .p2align 4
884L(next_two_double_words_32):
885 and $15, %dh
886 jz L(fourth_double_word_32)
887 mov 40(%rdi), %eax
888 cmp 40(%rsi), %eax
889 jne L(nequal)
890 ret
891
892 .p2align 4
893L(fourth_double_word_32):
894 mov 44(%rdi), %eax
895 cmp 44(%rsi), %eax
896 jne L(nequal)
897 ret
898
899 .p2align 4
900L(less4_double_words_48):
901 xor %eax, %eax
902 test %dl, %dl
903 jz L(next_two_double_words_48)
904 and $15, %dl
905 jz L(second_double_word_48)
906 mov 48(%rdi), %eax
907 cmp 48(%rsi), %eax
908 jne L(nequal)
909 ret
910
911 .p2align 4
912L(second_double_word_48):
913 mov 52(%rdi), %eax
914 cmp 52(%rsi), %eax
915 jne L(nequal)
916 ret
917
918 .p2align 4
919L(next_two_double_words_48):
920 and $15, %dh
921 jz L(fourth_double_word_48)
922 mov 56(%rdi), %eax
923 cmp 56(%rsi), %eax
924 jne L(nequal)
925 ret
926
927 .p2align 4
928L(fourth_double_word_48):
929 mov 60(%rdi), %eax
930 cmp 60(%rsi), %eax
931 jne L(nequal)
932 ret
933
934 .p2align 4
935L(nequal):
936 mov $1, %eax
937 jg L(nequal_bigger)
938 neg %eax
939
940L(nequal_bigger):
941 ret
942
943 .p2align 4
944L(equal):
945 xor %rax, %rax
946 ret
947
948END (__wcscmp)
949#ifndef __wcscmp
950libc_hidden_def (__wcscmp)
951weak_alias (__wcscmp, wcscmp)
952#endif
953