1/* strcat with SSSE3
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#if IS_IN (libc)
21
22# include <sysdep.h>
23
24# ifndef STRCAT
25# define STRCAT __strcat_ssse3
26# endif
27
28# define USE_AS_STRCAT
29
30.text
31ENTRY (STRCAT)
32# ifdef USE_AS_STRNCAT
33 mov %rdx, %r8
34# endif
35
36
37/* Inline corresponding strlen file, temporary until new strcpy
38 implementation gets merged. */
39
40 xor %eax, %eax
41 cmpb $0, (%rdi)
42 jz L(exit_tail0)
43 cmpb $0, 1(%rdi)
44 jz L(exit_tail1)
45 cmpb $0, 2(%rdi)
46 jz L(exit_tail2)
47 cmpb $0, 3(%rdi)
48 jz L(exit_tail3)
49
50 cmpb $0, 4(%rdi)
51 jz L(exit_tail4)
52 cmpb $0, 5(%rdi)
53 jz L(exit_tail5)
54 cmpb $0, 6(%rdi)
55 jz L(exit_tail6)
56 cmpb $0, 7(%rdi)
57 jz L(exit_tail7)
58
59 cmpb $0, 8(%rdi)
60 jz L(exit_tail8)
61 cmpb $0, 9(%rdi)
62 jz L(exit_tail9)
63 cmpb $0, 10(%rdi)
64 jz L(exit_tail10)
65 cmpb $0, 11(%rdi)
66 jz L(exit_tail11)
67
68 cmpb $0, 12(%rdi)
69 jz L(exit_tail12)
70 cmpb $0, 13(%rdi)
71 jz L(exit_tail13)
72 cmpb $0, 14(%rdi)
73 jz L(exit_tail14)
74 cmpb $0, 15(%rdi)
75 jz L(exit_tail15)
76 pxor %xmm0, %xmm0
77 lea 16(%rdi), %rcx
78 lea 16(%rdi), %rax
79 and $-16, %rax
80
81 pcmpeqb (%rax), %xmm0
82 pmovmskb %xmm0, %edx
83 pxor %xmm1, %xmm1
84 test %edx, %edx
85 lea 16(%rax), %rax
86 jnz L(exit)
87
88 pcmpeqb (%rax), %xmm1
89 pmovmskb %xmm1, %edx
90 pxor %xmm2, %xmm2
91 test %edx, %edx
92 lea 16(%rax), %rax
93 jnz L(exit)
94
95 pcmpeqb (%rax), %xmm2
96 pmovmskb %xmm2, %edx
97 pxor %xmm3, %xmm3
98 test %edx, %edx
99 lea 16(%rax), %rax
100 jnz L(exit)
101
102 pcmpeqb (%rax), %xmm3
103 pmovmskb %xmm3, %edx
104 test %edx, %edx
105 lea 16(%rax), %rax
106 jnz L(exit)
107
108 pcmpeqb (%rax), %xmm0
109 pmovmskb %xmm0, %edx
110 test %edx, %edx
111 lea 16(%rax), %rax
112 jnz L(exit)
113
114 pcmpeqb (%rax), %xmm1
115 pmovmskb %xmm1, %edx
116 test %edx, %edx
117 lea 16(%rax), %rax
118 jnz L(exit)
119
120 pcmpeqb (%rax), %xmm2
121 pmovmskb %xmm2, %edx
122 test %edx, %edx
123 lea 16(%rax), %rax
124 jnz L(exit)
125
126 pcmpeqb (%rax), %xmm3
127 pmovmskb %xmm3, %edx
128 test %edx, %edx
129 lea 16(%rax), %rax
130 jnz L(exit)
131
132 pcmpeqb (%rax), %xmm0
133 pmovmskb %xmm0, %edx
134 test %edx, %edx
135 lea 16(%rax), %rax
136 jnz L(exit)
137
138 pcmpeqb (%rax), %xmm1
139 pmovmskb %xmm1, %edx
140 test %edx, %edx
141 lea 16(%rax), %rax
142 jnz L(exit)
143
144 pcmpeqb (%rax), %xmm2
145 pmovmskb %xmm2, %edx
146 test %edx, %edx
147 lea 16(%rax), %rax
148 jnz L(exit)
149
150 pcmpeqb (%rax), %xmm3
151 pmovmskb %xmm3, %edx
152 test %edx, %edx
153 lea 16(%rax), %rax
154 jnz L(exit)
155
156 pcmpeqb (%rax), %xmm0
157 pmovmskb %xmm0, %edx
158 test %edx, %edx
159 lea 16(%rax), %rax
160 jnz L(exit)
161
162 pcmpeqb (%rax), %xmm1
163 pmovmskb %xmm1, %edx
164 test %edx, %edx
165 lea 16(%rax), %rax
166 jnz L(exit)
167
168 pcmpeqb (%rax), %xmm2
169 pmovmskb %xmm2, %edx
170 test %edx, %edx
171 lea 16(%rax), %rax
172 jnz L(exit)
173
174 pcmpeqb (%rax), %xmm3
175 pmovmskb %xmm3, %edx
176 test %edx, %edx
177 lea 16(%rax), %rax
178 jnz L(exit)
179
180 and $-0x40, %rax
181
182 .p2align 4
183L(aligned_64):
184 pcmpeqb (%rax), %xmm0
185 pcmpeqb 16(%rax), %xmm1
186 pcmpeqb 32(%rax), %xmm2
187 pcmpeqb 48(%rax), %xmm3
188 pmovmskb %xmm0, %edx
189 pmovmskb %xmm1, %r11d
190 pmovmskb %xmm2, %r10d
191 pmovmskb %xmm3, %r9d
192 or %edx, %r9d
193 or %r11d, %r9d
194 or %r10d, %r9d
195 lea 64(%rax), %rax
196 jz L(aligned_64)
197
198 test %edx, %edx
199 jnz L(aligned_64_exit_16)
200 test %r11d, %r11d
201 jnz L(aligned_64_exit_32)
202 test %r10d, %r10d
203 jnz L(aligned_64_exit_48)
204
205L(aligned_64_exit_64):
206 pmovmskb %xmm3, %edx
207 jmp L(exit)
208
209L(aligned_64_exit_48):
210 lea -16(%rax), %rax
211 mov %r10d, %edx
212 jmp L(exit)
213
214L(aligned_64_exit_32):
215 lea -32(%rax), %rax
216 mov %r11d, %edx
217 jmp L(exit)
218
219L(aligned_64_exit_16):
220 lea -48(%rax), %rax
221
222L(exit):
223 sub %rcx, %rax
224 test %dl, %dl
225 jz L(exit_high)
226 test $0x01, %dl
227 jnz L(exit_tail0)
228
229 test $0x02, %dl
230 jnz L(exit_tail1)
231
232 test $0x04, %dl
233 jnz L(exit_tail2)
234
235 test $0x08, %dl
236 jnz L(exit_tail3)
237
238 test $0x10, %dl
239 jnz L(exit_tail4)
240
241 test $0x20, %dl
242 jnz L(exit_tail5)
243
244 test $0x40, %dl
245 jnz L(exit_tail6)
246 add $7, %eax
247L(exit_tail0):
248 jmp L(StartStrcpyPart)
249
250 .p2align 4
251L(exit_high):
252 add $8, %eax
253 test $0x01, %dh
254 jnz L(exit_tail0)
255
256 test $0x02, %dh
257 jnz L(exit_tail1)
258
259 test $0x04, %dh
260 jnz L(exit_tail2)
261
262 test $0x08, %dh
263 jnz L(exit_tail3)
264
265 test $0x10, %dh
266 jnz L(exit_tail4)
267
268 test $0x20, %dh
269 jnz L(exit_tail5)
270
271 test $0x40, %dh
272 jnz L(exit_tail6)
273 add $7, %eax
274 jmp L(StartStrcpyPart)
275
276 .p2align 4
277L(exit_tail1):
278 add $1, %eax
279 jmp L(StartStrcpyPart)
280
281 .p2align 4
282L(exit_tail2):
283 add $2, %eax
284 jmp L(StartStrcpyPart)
285
286 .p2align 4
287L(exit_tail3):
288 add $3, %eax
289 jmp L(StartStrcpyPart)
290
291 .p2align 4
292L(exit_tail4):
293 add $4, %eax
294 jmp L(StartStrcpyPart)
295
296 .p2align 4
297L(exit_tail5):
298 add $5, %eax
299 jmp L(StartStrcpyPart)
300
301 .p2align 4
302L(exit_tail6):
303 add $6, %eax
304 jmp L(StartStrcpyPart)
305
306 .p2align 4
307L(exit_tail7):
308 add $7, %eax
309 jmp L(StartStrcpyPart)
310
311 .p2align 4
312L(exit_tail8):
313 add $8, %eax
314 jmp L(StartStrcpyPart)
315
316 .p2align 4
317L(exit_tail9):
318 add $9, %eax
319 jmp L(StartStrcpyPart)
320
321 .p2align 4
322L(exit_tail10):
323 add $10, %eax
324 jmp L(StartStrcpyPart)
325
326 .p2align 4
327L(exit_tail11):
328 add $11, %eax
329 jmp L(StartStrcpyPart)
330
331 .p2align 4
332L(exit_tail12):
333 add $12, %eax
334 jmp L(StartStrcpyPart)
335
336 .p2align 4
337L(exit_tail13):
338 add $13, %eax
339 jmp L(StartStrcpyPart)
340
341 .p2align 4
342L(exit_tail14):
343 add $14, %eax
344 jmp L(StartStrcpyPart)
345
346 .p2align 4
347L(exit_tail15):
348 add $15, %eax
349
350 .p2align 4
351L(StartStrcpyPart):
352 mov %rsi, %rcx
353 lea (%rdi, %rax), %rdx
354# ifdef USE_AS_STRNCAT
355 test %r8, %r8
356 jz L(StrncatExit0)
357 cmp $8, %r8
358 jbe L(StrncatExit8Bytes)
359# endif
360 cmpb $0, (%rcx)
361 jz L(Exit1)
362 cmpb $0, 1(%rcx)
363 jz L(Exit2)
364 cmpb $0, 2(%rcx)
365 jz L(Exit3)
366 cmpb $0, 3(%rcx)
367 jz L(Exit4)
368 cmpb $0, 4(%rcx)
369 jz L(Exit5)
370 cmpb $0, 5(%rcx)
371 jz L(Exit6)
372 cmpb $0, 6(%rcx)
373 jz L(Exit7)
374 cmpb $0, 7(%rcx)
375 jz L(Exit8)
376 cmpb $0, 8(%rcx)
377 jz L(Exit9)
378# ifdef USE_AS_STRNCAT
379 cmp $16, %r8
380 jb L(StrncatExit15Bytes)
381# endif
382 cmpb $0, 9(%rcx)
383 jz L(Exit10)
384 cmpb $0, 10(%rcx)
385 jz L(Exit11)
386 cmpb $0, 11(%rcx)
387 jz L(Exit12)
388 cmpb $0, 12(%rcx)
389 jz L(Exit13)
390 cmpb $0, 13(%rcx)
391 jz L(Exit14)
392 cmpb $0, 14(%rcx)
393 jz L(Exit15)
394 cmpb $0, 15(%rcx)
395 jz L(Exit16)
396# ifdef USE_AS_STRNCAT
397 cmp $16, %r8
398 je L(StrncatExit16)
399# define USE_AS_STRNCPY
400# endif
401
402# include "strcpy-ssse3.S"
403
404 .p2align 4
405L(CopyFrom1To16Bytes):
406 add %rsi, %rdx
407 add %rsi, %rcx
408
409 test %al, %al
410 jz L(ExitHigh)
411 test $0x01, %al
412 jnz L(Exit1)
413 test $0x02, %al
414 jnz L(Exit2)
415 test $0x04, %al
416 jnz L(Exit3)
417 test $0x08, %al
418 jnz L(Exit4)
419 test $0x10, %al
420 jnz L(Exit5)
421 test $0x20, %al
422 jnz L(Exit6)
423 test $0x40, %al
424 jnz L(Exit7)
425 movlpd (%rcx), %xmm0
426 movlpd %xmm0, (%rdx)
427 mov %rdi, %rax
428 ret
429
430 .p2align 4
431L(ExitHigh):
432 test $0x01, %ah
433 jnz L(Exit9)
434 test $0x02, %ah
435 jnz L(Exit10)
436 test $0x04, %ah
437 jnz L(Exit11)
438 test $0x08, %ah
439 jnz L(Exit12)
440 test $0x10, %ah
441 jnz L(Exit13)
442 test $0x20, %ah
443 jnz L(Exit14)
444 test $0x40, %ah
445 jnz L(Exit15)
446 movlpd (%rcx), %xmm0
447 movlpd 8(%rcx), %xmm1
448 movlpd %xmm0, (%rdx)
449 movlpd %xmm1, 8(%rdx)
450 mov %rdi, %rax
451 ret
452
453 .p2align 4
454L(StrncatExit1):
455 xor %ah, %ah
456 movb %ah, 1(%rdx)
457L(Exit1):
458 movb (%rcx), %al
459 movb %al, (%rdx)
460 mov %rdi, %rax
461 ret
462
463 .p2align 4
464L(StrncatExit2):
465 xor %ah, %ah
466 movb %ah, 2(%rdx)
467L(Exit2):
468 movw (%rcx), %ax
469 movw %ax, (%rdx)
470 mov %rdi, %rax
471 ret
472
473 .p2align 4
474L(StrncatExit3):
475 xor %ah, %ah
476 movb %ah, 3(%rdx)
477L(Exit3):
478 movw (%rcx), %ax
479 movw %ax, (%rdx)
480 movb 2(%rcx), %al
481 movb %al, 2(%rdx)
482 mov %rdi, %rax
483 ret
484
485 .p2align 4
486L(StrncatExit4):
487 xor %ah, %ah
488 movb %ah, 4(%rdx)
489L(Exit4):
490 mov (%rcx), %eax
491 mov %eax, (%rdx)
492 mov %rdi, %rax
493 ret
494
495 .p2align 4
496L(StrncatExit5):
497 xor %ah, %ah
498 movb %ah, 5(%rdx)
499L(Exit5):
500 mov (%rcx), %eax
501 mov %eax, (%rdx)
502 movb 4(%rcx), %al
503 movb %al, 4(%rdx)
504 mov %rdi, %rax
505 ret
506
507 .p2align 4
508L(StrncatExit6):
509 xor %ah, %ah
510 movb %ah, 6(%rdx)
511L(Exit6):
512 mov (%rcx), %eax
513 mov %eax, (%rdx)
514 movw 4(%rcx), %ax
515 movw %ax, 4(%rdx)
516 mov %rdi, %rax
517 ret
518
519 .p2align 4
520L(StrncatExit7):
521 xor %ah, %ah
522 movb %ah, 7(%rdx)
523L(Exit7):
524 mov (%rcx), %eax
525 mov %eax, (%rdx)
526 mov 3(%rcx), %eax
527 mov %eax, 3(%rdx)
528 mov %rdi, %rax
529 ret
530
531 .p2align 4
532L(StrncatExit8):
533 xor %ah, %ah
534 movb %ah, 8(%rdx)
535L(Exit8):
536 movlpd (%rcx), %xmm0
537 movlpd %xmm0, (%rdx)
538 mov %rdi, %rax
539 ret
540
541 .p2align 4
542L(StrncatExit9):
543 xor %ah, %ah
544 movb %ah, 9(%rdx)
545L(Exit9):
546 movlpd (%rcx), %xmm0
547 movlpd %xmm0, (%rdx)
548 movb 8(%rcx), %al
549 movb %al, 8(%rdx)
550 mov %rdi, %rax
551 ret
552
553 .p2align 4
554L(StrncatExit10):
555 xor %ah, %ah
556 movb %ah, 10(%rdx)
557L(Exit10):
558 movlpd (%rcx), %xmm0
559 movlpd %xmm0, (%rdx)
560 movw 8(%rcx), %ax
561 movw %ax, 8(%rdx)
562 mov %rdi, %rax
563 ret
564
565 .p2align 4
566L(StrncatExit11):
567 xor %ah, %ah
568 movb %ah, 11(%rdx)
569L(Exit11):
570 movlpd (%rcx), %xmm0
571 movlpd %xmm0, (%rdx)
572 mov 7(%rcx), %eax
573 mov %eax, 7(%rdx)
574 mov %rdi, %rax
575 ret
576
577 .p2align 4
578L(StrncatExit12):
579 xor %ah, %ah
580 movb %ah, 12(%rdx)
581L(Exit12):
582 movlpd (%rcx), %xmm0
583 movlpd %xmm0, (%rdx)
584 mov 8(%rcx), %eax
585 mov %eax, 8(%rdx)
586 mov %rdi, %rax
587 ret
588
589 .p2align 4
590L(StrncatExit13):
591 xor %ah, %ah
592 movb %ah, 13(%rdx)
593L(Exit13):
594 movlpd (%rcx), %xmm0
595 movlpd %xmm0, (%rdx)
596 movlpd 5(%rcx), %xmm1
597 movlpd %xmm1, 5(%rdx)
598 mov %rdi, %rax
599 ret
600
601 .p2align 4
602L(StrncatExit14):
603 xor %ah, %ah
604 movb %ah, 14(%rdx)
605L(Exit14):
606 movlpd (%rcx), %xmm0
607 movlpd %xmm0, (%rdx)
608 movlpd 6(%rcx), %xmm1
609 movlpd %xmm1, 6(%rdx)
610 mov %rdi, %rax
611 ret
612
613 .p2align 4
614L(StrncatExit15):
615 xor %ah, %ah
616 movb %ah, 15(%rdx)
617L(Exit15):
618 movlpd (%rcx), %xmm0
619 movlpd %xmm0, (%rdx)
620 movlpd 7(%rcx), %xmm1
621 movlpd %xmm1, 7(%rdx)
622 mov %rdi, %rax
623 ret
624
625 .p2align 4
626L(StrncatExit16):
627 xor %ah, %ah
628 movb %ah, 16(%rdx)
629L(Exit16):
630 movlpd (%rcx), %xmm0
631 movlpd 8(%rcx), %xmm1
632 movlpd %xmm0, (%rdx)
633 movlpd %xmm1, 8(%rdx)
634 mov %rdi, %rax
635 ret
636
637# ifdef USE_AS_STRNCPY
638
639 .p2align 4
640L(CopyFrom1To16BytesCase2):
641 add $16, %r8
642 add %rsi, %rcx
643 lea (%rsi, %rdx), %rsi
644 lea -9(%r8), %rdx
645 and $1<<7, %dh
646 or %al, %dh
647 test %dh, %dh
648 lea (%rsi), %rdx
649 jz L(ExitHighCase2)
650
651 test $0x01, %al
652 jnz L(Exit1)
653 cmp $1, %r8
654 je L(StrncatExit1)
655 test $0x02, %al
656 jnz L(Exit2)
657 cmp $2, %r8
658 je L(StrncatExit2)
659 test $0x04, %al
660 jnz L(Exit3)
661 cmp $3, %r8
662 je L(StrncatExit3)
663 test $0x08, %al
664 jnz L(Exit4)
665 cmp $4, %r8
666 je L(StrncatExit4)
667 test $0x10, %al
668 jnz L(Exit5)
669 cmp $5, %r8
670 je L(StrncatExit5)
671 test $0x20, %al
672 jnz L(Exit6)
673 cmp $6, %r8
674 je L(StrncatExit6)
675 test $0x40, %al
676 jnz L(Exit7)
677 cmp $7, %r8
678 je L(StrncatExit7)
679 movlpd (%rcx), %xmm0
680 movlpd %xmm0, (%rdx)
681 lea 7(%rdx), %rax
682 cmpb $1, (%rax)
683 sbb $-1, %rax
684 xor %cl, %cl
685 movb %cl, (%rax)
686 mov %rdi, %rax
687 ret
688
689 .p2align 4
690L(ExitHighCase2):
691 test $0x01, %ah
692 jnz L(Exit9)
693 cmp $9, %r8
694 je L(StrncatExit9)
695 test $0x02, %ah
696 jnz L(Exit10)
697 cmp $10, %r8
698 je L(StrncatExit10)
699 test $0x04, %ah
700 jnz L(Exit11)
701 cmp $11, %r8
702 je L(StrncatExit11)
703 test $0x8, %ah
704 jnz L(Exit12)
705 cmp $12, %r8
706 je L(StrncatExit12)
707 test $0x10, %ah
708 jnz L(Exit13)
709 cmp $13, %r8
710 je L(StrncatExit13)
711 test $0x20, %ah
712 jnz L(Exit14)
713 cmp $14, %r8
714 je L(StrncatExit14)
715 test $0x40, %ah
716 jnz L(Exit15)
717 cmp $15, %r8
718 je L(StrncatExit15)
719 movlpd (%rcx), %xmm0
720 movlpd %xmm0, (%rdx)
721 movlpd 8(%rcx), %xmm1
722 movlpd %xmm1, 8(%rdx)
723 mov %rdi, %rax
724 ret
725
726L(CopyFrom1To16BytesCase2OrCase3):
727 test %rax, %rax
728 jnz L(CopyFrom1To16BytesCase2)
729
730 .p2align 4
731L(CopyFrom1To16BytesCase3):
732 add $16, %r8
733 add %rsi, %rdx
734 add %rsi, %rcx
735
736 cmp $8, %r8
737 ja L(ExitHighCase3)
738 cmp $1, %r8
739 je L(StrncatExit1)
740 cmp $2, %r8
741 je L(StrncatExit2)
742 cmp $3, %r8
743 je L(StrncatExit3)
744 cmp $4, %r8
745 je L(StrncatExit4)
746 cmp $5, %r8
747 je L(StrncatExit5)
748 cmp $6, %r8
749 je L(StrncatExit6)
750 cmp $7, %r8
751 je L(StrncatExit7)
752 movlpd (%rcx), %xmm0
753 movlpd %xmm0, (%rdx)
754 xor %ah, %ah
755 movb %ah, 8(%rdx)
756 mov %rdi, %rax
757 ret
758
759 .p2align 4
760L(ExitHighCase3):
761 cmp $9, %r8
762 je L(StrncatExit9)
763 cmp $10, %r8
764 je L(StrncatExit10)
765 cmp $11, %r8
766 je L(StrncatExit11)
767 cmp $12, %r8
768 je L(StrncatExit12)
769 cmp $13, %r8
770 je L(StrncatExit13)
771 cmp $14, %r8
772 je L(StrncatExit14)
773 cmp $15, %r8
774 je L(StrncatExit15)
775 movlpd (%rcx), %xmm0
776 movlpd %xmm0, (%rdx)
777 movlpd 8(%rcx), %xmm1
778 movlpd %xmm1, 8(%rdx)
779 xor %ah, %ah
780 movb %ah, 16(%rdx)
781 mov %rdi, %rax
782 ret
783
784 .p2align 4
785L(StrncatExit0):
786 mov %rdi, %rax
787 ret
788
789 .p2align 4
790L(StrncatExit15Bytes):
791 cmp $9, %r8
792 je L(StrncatExit9)
793 cmpb $0, 9(%rcx)
794 jz L(Exit10)
795 cmp $10, %r8
796 je L(StrncatExit10)
797 cmpb $0, 10(%rcx)
798 jz L(Exit11)
799 cmp $11, %r8
800 je L(StrncatExit11)
801 cmpb $0, 11(%rcx)
802 jz L(Exit12)
803 cmp $12, %r8
804 je L(StrncatExit12)
805 cmpb $0, 12(%rcx)
806 jz L(Exit13)
807 cmp $13, %r8
808 je L(StrncatExit13)
809 cmpb $0, 13(%rcx)
810 jz L(Exit14)
811 cmp $14, %r8
812 je L(StrncatExit14)
813 movlpd (%rcx), %xmm0
814 movlpd %xmm0, (%rdx)
815 movlpd 7(%rcx), %xmm1
816 movlpd %xmm1, 7(%rdx)
817 lea 14(%rdx), %rax
818 cmpb $1, (%rax)
819 sbb $-1, %rax
820 xor %cl, %cl
821 movb %cl, (%rax)
822 mov %rdi, %rax
823 ret
824
825 .p2align 4
826L(StrncatExit8Bytes):
827 cmpb $0, (%rcx)
828 jz L(Exit1)
829 cmp $1, %r8
830 je L(StrncatExit1)
831 cmpb $0, 1(%rcx)
832 jz L(Exit2)
833 cmp $2, %r8
834 je L(StrncatExit2)
835 cmpb $0, 2(%rcx)
836 jz L(Exit3)
837 cmp $3, %r8
838 je L(StrncatExit3)
839 cmpb $0, 3(%rcx)
840 jz L(Exit4)
841 cmp $4, %r8
842 je L(StrncatExit4)
843 cmpb $0, 4(%rcx)
844 jz L(Exit5)
845 cmp $5, %r8
846 je L(StrncatExit5)
847 cmpb $0, 5(%rcx)
848 jz L(Exit6)
849 cmp $6, %r8
850 je L(StrncatExit6)
851 cmpb $0, 6(%rcx)
852 jz L(Exit7)
853 cmp $7, %r8
854 je L(StrncatExit7)
855 movlpd (%rcx), %xmm0
856 movlpd %xmm0, (%rdx)
857 lea 7(%rdx), %rax
858 cmpb $1, (%rax)
859 sbb $-1, %rax
860 xor %cl, %cl
861 movb %cl, (%rax)
862 mov %rdi, %rax
863 ret
864
865# endif
866END (STRCAT)
867#endif
868