1/* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#if IS_IN (libc)
21
22# ifndef USE_AS_STRCAT
23# include <sysdep.h>
24
25# ifndef STRCPY
26# define STRCPY __strcpy_sse2_unaligned
27# endif
28
29# endif
30
31# define JMPTBL(I, B) I - B
32# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
33 lea TABLE(%rip), %r11; \
34 movslq (%r11, INDEX, SCALE), %rcx; \
35 lea (%r11, %rcx), %rcx; \
36 _CET_NOTRACK jmp *%rcx
37
38# ifndef USE_AS_STRCAT
39
40.text
41ENTRY (STRCPY)
42# ifdef USE_AS_STRNCPY
43 mov %RDX_LP, %R8_LP
44 test %R8_LP, %R8_LP
45 jz L(ExitZero)
46# endif
47 mov %rsi, %rcx
48# ifndef USE_AS_STPCPY
49 mov %rdi, %rax /* save result */
50# endif
51
52# endif
53
54 and $63, %rcx
55 cmp $32, %rcx
56 jbe L(SourceStringAlignmentLess32)
57
58 and $-16, %rsi
59 and $15, %rcx
60 pxor %xmm0, %xmm0
61 pxor %xmm1, %xmm1
62
63 pcmpeqb (%rsi), %xmm1
64 pmovmskb %xmm1, %rdx
65 shr %cl, %rdx
66
67# ifdef USE_AS_STRNCPY
68# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
69 mov $16, %r10
70 sub %rcx, %r10
71 cmp %r10, %r8
72# else
73 mov $17, %r10
74 sub %rcx, %r10
75 cmp %r10, %r8
76# endif
77 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
78# endif
79 test %rdx, %rdx
80 jnz L(CopyFrom1To16BytesTail)
81
82 pcmpeqb 16(%rsi), %xmm0
83 pmovmskb %xmm0, %rdx
84
85# ifdef USE_AS_STRNCPY
86 add $16, %r10
87 cmp %r10, %r8
88 jbe L(CopyFrom1To32BytesCase2OrCase3)
89# endif
90 test %rdx, %rdx
91 jnz L(CopyFrom1To32Bytes)
92
93 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
94 movdqu %xmm1, (%rdi)
95
96/* If source address alignment != destination address alignment */
97 .p2align 4
98L(Unalign16Both):
99 sub %rcx, %rdi
100# ifdef USE_AS_STRNCPY
101 add %rcx, %r8
102 sbb %rcx, %rcx
103 or %rcx, %r8
104# endif
105 mov $16, %rcx
106 movdqa (%rsi, %rcx), %xmm1
107 movaps 16(%rsi, %rcx), %xmm2
108 movdqu %xmm1, (%rdi, %rcx)
109 pcmpeqb %xmm2, %xmm0
110 pmovmskb %xmm0, %rdx
111 add $16, %rcx
112# ifdef USE_AS_STRNCPY
113 sub $48, %r8
114 jbe L(CopyFrom1To16BytesCase2OrCase3)
115# endif
116 test %rdx, %rdx
117# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
118 jnz L(CopyFrom1To16BytesUnalignedXmm2)
119# else
120 jnz L(CopyFrom1To16Bytes)
121# endif
122
123 movaps 16(%rsi, %rcx), %xmm3
124 movdqu %xmm2, (%rdi, %rcx)
125 pcmpeqb %xmm3, %xmm0
126 pmovmskb %xmm0, %rdx
127 add $16, %rcx
128# ifdef USE_AS_STRNCPY
129 sub $16, %r8
130 jbe L(CopyFrom1To16BytesCase2OrCase3)
131# endif
132 test %rdx, %rdx
133# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
134 jnz L(CopyFrom1To16BytesUnalignedXmm3)
135# else
136 jnz L(CopyFrom1To16Bytes)
137# endif
138
139 movaps 16(%rsi, %rcx), %xmm4
140 movdqu %xmm3, (%rdi, %rcx)
141 pcmpeqb %xmm4, %xmm0
142 pmovmskb %xmm0, %rdx
143 add $16, %rcx
144# ifdef USE_AS_STRNCPY
145 sub $16, %r8
146 jbe L(CopyFrom1To16BytesCase2OrCase3)
147# endif
148 test %rdx, %rdx
149# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
150 jnz L(CopyFrom1To16BytesUnalignedXmm4)
151# else
152 jnz L(CopyFrom1To16Bytes)
153# endif
154
155 movaps 16(%rsi, %rcx), %xmm1
156 movdqu %xmm4, (%rdi, %rcx)
157 pcmpeqb %xmm1, %xmm0
158 pmovmskb %xmm0, %rdx
159 add $16, %rcx
160# ifdef USE_AS_STRNCPY
161 sub $16, %r8
162 jbe L(CopyFrom1To16BytesCase2OrCase3)
163# endif
164 test %rdx, %rdx
165# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
166 jnz L(CopyFrom1To16BytesUnalignedXmm1)
167# else
168 jnz L(CopyFrom1To16Bytes)
169# endif
170
171 movaps 16(%rsi, %rcx), %xmm2
172 movdqu %xmm1, (%rdi, %rcx)
173 pcmpeqb %xmm2, %xmm0
174 pmovmskb %xmm0, %rdx
175 add $16, %rcx
176# ifdef USE_AS_STRNCPY
177 sub $16, %r8
178 jbe L(CopyFrom1To16BytesCase2OrCase3)
179# endif
180 test %rdx, %rdx
181# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
182 jnz L(CopyFrom1To16BytesUnalignedXmm2)
183# else
184 jnz L(CopyFrom1To16Bytes)
185# endif
186
187 movaps 16(%rsi, %rcx), %xmm3
188 movdqu %xmm2, (%rdi, %rcx)
189 pcmpeqb %xmm3, %xmm0
190 pmovmskb %xmm0, %rdx
191 add $16, %rcx
192# ifdef USE_AS_STRNCPY
193 sub $16, %r8
194 jbe L(CopyFrom1To16BytesCase2OrCase3)
195# endif
196 test %rdx, %rdx
197# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
198 jnz L(CopyFrom1To16BytesUnalignedXmm3)
199# else
200 jnz L(CopyFrom1To16Bytes)
201# endif
202
203 movdqu %xmm3, (%rdi, %rcx)
204 mov %rsi, %rdx
205 lea 16(%rsi, %rcx), %rsi
206 and $-0x40, %rsi
207 sub %rsi, %rdx
208 sub %rdx, %rdi
209# ifdef USE_AS_STRNCPY
210 lea 128(%r8, %rdx), %r8
211# endif
212L(Unaligned64Loop):
213 movaps (%rsi), %xmm2
214 movaps %xmm2, %xmm4
215 movaps 16(%rsi), %xmm5
216 movaps 32(%rsi), %xmm3
217 movaps %xmm3, %xmm6
218 movaps 48(%rsi), %xmm7
219 pminub %xmm5, %xmm2
220 pminub %xmm7, %xmm3
221 pminub %xmm2, %xmm3
222 pcmpeqb %xmm0, %xmm3
223 pmovmskb %xmm3, %rdx
224# ifdef USE_AS_STRNCPY
225 sub $64, %r8
226 jbe L(UnalignedLeaveCase2OrCase3)
227# endif
228 test %rdx, %rdx
229 jnz L(Unaligned64Leave)
230
231L(Unaligned64Loop_start):
232 add $64, %rdi
233 add $64, %rsi
234 movdqu %xmm4, -64(%rdi)
235 movaps (%rsi), %xmm2
236 movdqa %xmm2, %xmm4
237 movdqu %xmm5, -48(%rdi)
238 movaps 16(%rsi), %xmm5
239 pminub %xmm5, %xmm2
240 movaps 32(%rsi), %xmm3
241 movdqu %xmm6, -32(%rdi)
242 movaps %xmm3, %xmm6
243 movdqu %xmm7, -16(%rdi)
244 movaps 48(%rsi), %xmm7
245 pminub %xmm7, %xmm3
246 pminub %xmm2, %xmm3
247 pcmpeqb %xmm0, %xmm3
248 pmovmskb %xmm3, %rdx
249# ifdef USE_AS_STRNCPY
250 sub $64, %r8
251 jbe L(UnalignedLeaveCase2OrCase3)
252# endif
253 test %rdx, %rdx
254 jz L(Unaligned64Loop_start)
255
256L(Unaligned64Leave):
257 pxor %xmm1, %xmm1
258
259 pcmpeqb %xmm4, %xmm0
260 pcmpeqb %xmm5, %xmm1
261 pmovmskb %xmm0, %rdx
262 pmovmskb %xmm1, %rcx
263 test %rdx, %rdx
264 jnz L(CopyFrom1To16BytesUnaligned_0)
265 test %rcx, %rcx
266 jnz L(CopyFrom1To16BytesUnaligned_16)
267
268 pcmpeqb %xmm6, %xmm0
269 pcmpeqb %xmm7, %xmm1
270 pmovmskb %xmm0, %rdx
271 pmovmskb %xmm1, %rcx
272 test %rdx, %rdx
273 jnz L(CopyFrom1To16BytesUnaligned_32)
274
275 bsf %rcx, %rdx
276 movdqu %xmm4, (%rdi)
277 movdqu %xmm5, 16(%rdi)
278 movdqu %xmm6, 32(%rdi)
279# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
280# ifdef USE_AS_STPCPY
281 lea 48(%rdi, %rdx), %rax
282# endif
283 movdqu %xmm7, 48(%rdi)
284 add $15, %r8
285 sub %rdx, %r8
286 lea 49(%rdi, %rdx), %rdi
287 jmp L(StrncpyFillTailWithZero)
288# else
289 add $48, %rsi
290 add $48, %rdi
291 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
292# endif
293
294/* If source address alignment == destination address alignment */
295
296L(SourceStringAlignmentLess32):
297 pxor %xmm0, %xmm0
298 movdqu (%rsi), %xmm1
299 movdqu 16(%rsi), %xmm2
300 pcmpeqb %xmm1, %xmm0
301 pmovmskb %xmm0, %rdx
302
303# ifdef USE_AS_STRNCPY
304# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
305 cmp $16, %r8
306# else
307 cmp $17, %r8
308# endif
309 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
310# endif
311 test %rdx, %rdx
312 jnz L(CopyFrom1To16BytesTail1)
313
314 pcmpeqb %xmm2, %xmm0
315 movdqu %xmm1, (%rdi)
316 pmovmskb %xmm0, %rdx
317
318# ifdef USE_AS_STRNCPY
319# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
320 cmp $32, %r8
321# else
322 cmp $33, %r8
323# endif
324 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
325# endif
326 test %rdx, %rdx
327 jnz L(CopyFrom1To32Bytes1)
328
329 and $-16, %rsi
330 and $15, %rcx
331 jmp L(Unalign16Both)
332
333/*------End of main part with loops---------------------*/
334
335/* Case1 */
336
337# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
338 .p2align 4
339L(CopyFrom1To16Bytes):
340 add %rcx, %rdi
341 add %rcx, %rsi
342 bsf %rdx, %rdx
343 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
344# endif
345 .p2align 4
346L(CopyFrom1To16BytesTail):
347 add %rcx, %rsi
348 bsf %rdx, %rdx
349 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
350
351 .p2align 4
352L(CopyFrom1To32Bytes1):
353 add $16, %rsi
354 add $16, %rdi
355# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
356 sub $16, %r8
357# endif
358L(CopyFrom1To16BytesTail1):
359 bsf %rdx, %rdx
360 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
361
362 .p2align 4
363L(CopyFrom1To32Bytes):
364 bsf %rdx, %rdx
365 add %rcx, %rsi
366 add $16, %rdx
367 sub %rcx, %rdx
368 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
369
370 .p2align 4
371L(CopyFrom1To16BytesUnaligned_0):
372 bsf %rdx, %rdx
373# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
374# ifdef USE_AS_STPCPY
375 lea (%rdi, %rdx), %rax
376# endif
377 movdqu %xmm4, (%rdi)
378 add $63, %r8
379 sub %rdx, %r8
380 lea 1(%rdi, %rdx), %rdi
381 jmp L(StrncpyFillTailWithZero)
382# else
383 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
384# endif
385
386 .p2align 4
387L(CopyFrom1To16BytesUnaligned_16):
388 bsf %rcx, %rdx
389 movdqu %xmm4, (%rdi)
390# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
391# ifdef USE_AS_STPCPY
392 lea 16(%rdi, %rdx), %rax
393# endif
394 movdqu %xmm5, 16(%rdi)
395 add $47, %r8
396 sub %rdx, %r8
397 lea 17(%rdi, %rdx), %rdi
398 jmp L(StrncpyFillTailWithZero)
399# else
400 add $16, %rsi
401 add $16, %rdi
402 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
403# endif
404
405 .p2align 4
406L(CopyFrom1To16BytesUnaligned_32):
407 bsf %rdx, %rdx
408 movdqu %xmm4, (%rdi)
409 movdqu %xmm5, 16(%rdi)
410# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
411# ifdef USE_AS_STPCPY
412 lea 32(%rdi, %rdx), %rax
413# endif
414 movdqu %xmm6, 32(%rdi)
415 add $31, %r8
416 sub %rdx, %r8
417 lea 33(%rdi, %rdx), %rdi
418 jmp L(StrncpyFillTailWithZero)
419# else
420 add $32, %rsi
421 add $32, %rdi
422 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
423# endif
424
425# ifdef USE_AS_STRNCPY
426# ifndef USE_AS_STRCAT
427 .p2align 4
428L(CopyFrom1To16BytesUnalignedXmm6):
429 movdqu %xmm6, (%rdi, %rcx)
430 jmp L(CopyFrom1To16BytesXmmExit)
431
432 .p2align 4
433L(CopyFrom1To16BytesUnalignedXmm5):
434 movdqu %xmm5, (%rdi, %rcx)
435 jmp L(CopyFrom1To16BytesXmmExit)
436
437 .p2align 4
438L(CopyFrom1To16BytesUnalignedXmm4):
439 movdqu %xmm4, (%rdi, %rcx)
440 jmp L(CopyFrom1To16BytesXmmExit)
441
442 .p2align 4
443L(CopyFrom1To16BytesUnalignedXmm3):
444 movdqu %xmm3, (%rdi, %rcx)
445 jmp L(CopyFrom1To16BytesXmmExit)
446
447 .p2align 4
448L(CopyFrom1To16BytesUnalignedXmm1):
449 movdqu %xmm1, (%rdi, %rcx)
450 jmp L(CopyFrom1To16BytesXmmExit)
451# endif
452
453 .p2align 4
454L(CopyFrom1To16BytesExit):
455 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
456
457/* Case2 */
458
459 .p2align 4
460L(CopyFrom1To16BytesCase2):
461 add $16, %r8
462 add %rcx, %rdi
463 add %rcx, %rsi
464 bsf %rdx, %rdx
465 cmp %r8, %rdx
466 jb L(CopyFrom1To16BytesExit)
467 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
468
469 .p2align 4
470L(CopyFrom1To32BytesCase2):
471 add %rcx, %rsi
472 bsf %rdx, %rdx
473 add $16, %rdx
474 sub %rcx, %rdx
475 cmp %r8, %rdx
476 jb L(CopyFrom1To16BytesExit)
477 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
478
479L(CopyFrom1To16BytesTailCase2):
480 add %rcx, %rsi
481 bsf %rdx, %rdx
482 cmp %r8, %rdx
483 jb L(CopyFrom1To16BytesExit)
484 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
485
486L(CopyFrom1To16BytesTail1Case2):
487 bsf %rdx, %rdx
488 cmp %r8, %rdx
489 jb L(CopyFrom1To16BytesExit)
490 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
491
492/* Case2 or Case3, Case3 */
493
494 .p2align 4
495L(CopyFrom1To16BytesCase2OrCase3):
496 test %rdx, %rdx
497 jnz L(CopyFrom1To16BytesCase2)
498L(CopyFrom1To16BytesCase3):
499 add $16, %r8
500 add %rcx, %rdi
501 add %rcx, %rsi
502 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
503
504 .p2align 4
505L(CopyFrom1To32BytesCase2OrCase3):
506 test %rdx, %rdx
507 jnz L(CopyFrom1To32BytesCase2)
508 add %rcx, %rsi
509 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
510
511 .p2align 4
512L(CopyFrom1To16BytesTailCase2OrCase3):
513 test %rdx, %rdx
514 jnz L(CopyFrom1To16BytesTailCase2)
515 add %rcx, %rsi
516 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
517
518 .p2align 4
519L(CopyFrom1To32Bytes1Case2OrCase3):
520 add $16, %rdi
521 add $16, %rsi
522 sub $16, %r8
523L(CopyFrom1To16BytesTail1Case2OrCase3):
524 test %rdx, %rdx
525 jnz L(CopyFrom1To16BytesTail1Case2)
526 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
527
528# endif
529
530/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
531
532 .p2align 4
533L(Exit1):
534 mov %dh, (%rdi)
535# ifdef USE_AS_STPCPY
536 lea (%rdi), %rax
537# endif
538# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
539 sub $1, %r8
540 lea 1(%rdi), %rdi
541 jnz L(StrncpyFillTailWithZero)
542# endif
543 ret
544
545 .p2align 4
546L(Exit2):
547 mov (%rsi), %dx
548 mov %dx, (%rdi)
549# ifdef USE_AS_STPCPY
550 lea 1(%rdi), %rax
551# endif
552# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
553 sub $2, %r8
554 lea 2(%rdi), %rdi
555 jnz L(StrncpyFillTailWithZero)
556# endif
557 ret
558
559 .p2align 4
560L(Exit3):
561 mov (%rsi), %cx
562 mov %cx, (%rdi)
563 mov %dh, 2(%rdi)
564# ifdef USE_AS_STPCPY
565 lea 2(%rdi), %rax
566# endif
567# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
568 sub $3, %r8
569 lea 3(%rdi), %rdi
570 jnz L(StrncpyFillTailWithZero)
571# endif
572 ret
573
574 .p2align 4
575L(Exit4):
576 mov (%rsi), %edx
577 mov %edx, (%rdi)
578# ifdef USE_AS_STPCPY
579 lea 3(%rdi), %rax
580# endif
581# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
582 sub $4, %r8
583 lea 4(%rdi), %rdi
584 jnz L(StrncpyFillTailWithZero)
585# endif
586 ret
587
588 .p2align 4
589L(Exit5):
590 mov (%rsi), %ecx
591 mov %dh, 4(%rdi)
592 mov %ecx, (%rdi)
593# ifdef USE_AS_STPCPY
594 lea 4(%rdi), %rax
595# endif
596# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
597 sub $5, %r8
598 lea 5(%rdi), %rdi
599 jnz L(StrncpyFillTailWithZero)
600# endif
601 ret
602
603 .p2align 4
604L(Exit6):
605 mov (%rsi), %ecx
606 mov 4(%rsi), %dx
607 mov %ecx, (%rdi)
608 mov %dx, 4(%rdi)
609# ifdef USE_AS_STPCPY
610 lea 5(%rdi), %rax
611# endif
612# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
613 sub $6, %r8
614 lea 6(%rdi), %rdi
615 jnz L(StrncpyFillTailWithZero)
616# endif
617 ret
618
619 .p2align 4
620L(Exit7):
621 mov (%rsi), %ecx
622 mov 3(%rsi), %edx
623 mov %ecx, (%rdi)
624 mov %edx, 3(%rdi)
625# ifdef USE_AS_STPCPY
626 lea 6(%rdi), %rax
627# endif
628# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
629 sub $7, %r8
630 lea 7(%rdi), %rdi
631 jnz L(StrncpyFillTailWithZero)
632# endif
633 ret
634
635 .p2align 4
636L(Exit8):
637 mov (%rsi), %rdx
638 mov %rdx, (%rdi)
639# ifdef USE_AS_STPCPY
640 lea 7(%rdi), %rax
641# endif
642# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
643 sub $8, %r8
644 lea 8(%rdi), %rdi
645 jnz L(StrncpyFillTailWithZero)
646# endif
647 ret
648
649 .p2align 4
650L(Exit9):
651 mov (%rsi), %rcx
652 mov %dh, 8(%rdi)
653 mov %rcx, (%rdi)
654# ifdef USE_AS_STPCPY
655 lea 8(%rdi), %rax
656# endif
657# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
658 sub $9, %r8
659 lea 9(%rdi), %rdi
660 jnz L(StrncpyFillTailWithZero)
661# endif
662 ret
663
664 .p2align 4
665L(Exit10):
666 mov (%rsi), %rcx
667 mov 8(%rsi), %dx
668 mov %rcx, (%rdi)
669 mov %dx, 8(%rdi)
670# ifdef USE_AS_STPCPY
671 lea 9(%rdi), %rax
672# endif
673# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
674 sub $10, %r8
675 lea 10(%rdi), %rdi
676 jnz L(StrncpyFillTailWithZero)
677# endif
678 ret
679
680 .p2align 4
681L(Exit11):
682 mov (%rsi), %rcx
683 mov 7(%rsi), %edx
684 mov %rcx, (%rdi)
685 mov %edx, 7(%rdi)
686# ifdef USE_AS_STPCPY
687 lea 10(%rdi), %rax
688# endif
689# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
690 sub $11, %r8
691 lea 11(%rdi), %rdi
692 jnz L(StrncpyFillTailWithZero)
693# endif
694 ret
695
696 .p2align 4
697L(Exit12):
698 mov (%rsi), %rcx
699 mov 8(%rsi), %edx
700 mov %rcx, (%rdi)
701 mov %edx, 8(%rdi)
702# ifdef USE_AS_STPCPY
703 lea 11(%rdi), %rax
704# endif
705# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
706 sub $12, %r8
707 lea 12(%rdi), %rdi
708 jnz L(StrncpyFillTailWithZero)
709# endif
710 ret
711
712 .p2align 4
713L(Exit13):
714 mov (%rsi), %rcx
715 mov 5(%rsi), %rdx
716 mov %rcx, (%rdi)
717 mov %rdx, 5(%rdi)
718# ifdef USE_AS_STPCPY
719 lea 12(%rdi), %rax
720# endif
721# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
722 sub $13, %r8
723 lea 13(%rdi), %rdi
724 jnz L(StrncpyFillTailWithZero)
725# endif
726 ret
727
728 .p2align 4
729L(Exit14):
730 mov (%rsi), %rcx
731 mov 6(%rsi), %rdx
732 mov %rcx, (%rdi)
733 mov %rdx, 6(%rdi)
734# ifdef USE_AS_STPCPY
735 lea 13(%rdi), %rax
736# endif
737# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
738 sub $14, %r8
739 lea 14(%rdi), %rdi
740 jnz L(StrncpyFillTailWithZero)
741# endif
742 ret
743
744 .p2align 4
745L(Exit15):
746 mov (%rsi), %rcx
747 mov 7(%rsi), %rdx
748 mov %rcx, (%rdi)
749 mov %rdx, 7(%rdi)
750# ifdef USE_AS_STPCPY
751 lea 14(%rdi), %rax
752# endif
753# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
754 sub $15, %r8
755 lea 15(%rdi), %rdi
756 jnz L(StrncpyFillTailWithZero)
757# endif
758 ret
759
760 .p2align 4
761L(Exit16):
762 movdqu (%rsi), %xmm0
763 movdqu %xmm0, (%rdi)
764# ifdef USE_AS_STPCPY
765 lea 15(%rdi), %rax
766# endif
767# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
768 sub $16, %r8
769 lea 16(%rdi), %rdi
770 jnz L(StrncpyFillTailWithZero)
771# endif
772 ret
773
774 .p2align 4
775L(Exit17):
776 movdqu (%rsi), %xmm0
777 movdqu %xmm0, (%rdi)
778 mov %dh, 16(%rdi)
779# ifdef USE_AS_STPCPY
780 lea 16(%rdi), %rax
781# endif
782# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
783 sub $17, %r8
784 lea 17(%rdi), %rdi
785 jnz L(StrncpyFillTailWithZero)
786# endif
787 ret
788
789 .p2align 4
790L(Exit18):
791 movdqu (%rsi), %xmm0
792 mov 16(%rsi), %cx
793 movdqu %xmm0, (%rdi)
794 mov %cx, 16(%rdi)
795# ifdef USE_AS_STPCPY
796 lea 17(%rdi), %rax
797# endif
798# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
799 sub $18, %r8
800 lea 18(%rdi), %rdi
801 jnz L(StrncpyFillTailWithZero)
802# endif
803 ret
804
805 .p2align 4
806L(Exit19):
807 movdqu (%rsi), %xmm0
808 mov 15(%rsi), %ecx
809 movdqu %xmm0, (%rdi)
810 mov %ecx, 15(%rdi)
811# ifdef USE_AS_STPCPY
812 lea 18(%rdi), %rax
813# endif
814# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
815 sub $19, %r8
816 lea 19(%rdi), %rdi
817 jnz L(StrncpyFillTailWithZero)
818# endif
819 ret
820
821 .p2align 4
822L(Exit20):
823 movdqu (%rsi), %xmm0
824 mov 16(%rsi), %ecx
825 movdqu %xmm0, (%rdi)
826 mov %ecx, 16(%rdi)
827# ifdef USE_AS_STPCPY
828 lea 19(%rdi), %rax
829# endif
830# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
831 sub $20, %r8
832 lea 20(%rdi), %rdi
833 jnz L(StrncpyFillTailWithZero)
834# endif
835 ret
836
837 .p2align 4
838L(Exit21):
839 movdqu (%rsi), %xmm0
840 mov 16(%rsi), %ecx
841 movdqu %xmm0, (%rdi)
842 mov %ecx, 16(%rdi)
843 mov %dh, 20(%rdi)
844# ifdef USE_AS_STPCPY
845 lea 20(%rdi), %rax
846# endif
847# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
848 sub $21, %r8
849 lea 21(%rdi), %rdi
850 jnz L(StrncpyFillTailWithZero)
851# endif
852 ret
853
854 .p2align 4
855L(Exit22):
856 movdqu (%rsi), %xmm0
857 mov 14(%rsi), %rcx
858 movdqu %xmm0, (%rdi)
859 mov %rcx, 14(%rdi)
860# ifdef USE_AS_STPCPY
861 lea 21(%rdi), %rax
862# endif
863# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
864 sub $22, %r8
865 lea 22(%rdi), %rdi
866 jnz L(StrncpyFillTailWithZero)
867# endif
868 ret
869
870 .p2align 4
871L(Exit23):
872 movdqu (%rsi), %xmm0
873 mov 15(%rsi), %rcx
874 movdqu %xmm0, (%rdi)
875 mov %rcx, 15(%rdi)
876# ifdef USE_AS_STPCPY
877 lea 22(%rdi), %rax
878# endif
879# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
880 sub $23, %r8
881 lea 23(%rdi), %rdi
882 jnz L(StrncpyFillTailWithZero)
883# endif
884 ret
885
886 .p2align 4
887L(Exit24):
888 movdqu (%rsi), %xmm0
889 mov 16(%rsi), %rcx
890 movdqu %xmm0, (%rdi)
891 mov %rcx, 16(%rdi)
892# ifdef USE_AS_STPCPY
893 lea 23(%rdi), %rax
894# endif
895# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
896 sub $24, %r8
897 lea 24(%rdi), %rdi
898 jnz L(StrncpyFillTailWithZero)
899# endif
900 ret
901
902 .p2align 4
903L(Exit25):
904 movdqu (%rsi), %xmm0
905 mov 16(%rsi), %rcx
906 movdqu %xmm0, (%rdi)
907 mov %rcx, 16(%rdi)
908 mov %dh, 24(%rdi)
909# ifdef USE_AS_STPCPY
910 lea 24(%rdi), %rax
911# endif
912# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
913 sub $25, %r8
914 lea 25(%rdi), %rdi
915 jnz L(StrncpyFillTailWithZero)
916# endif
917 ret
918
919 .p2align 4
920L(Exit26):
921 movdqu (%rsi), %xmm0
922 mov 16(%rsi), %rdx
923 mov 24(%rsi), %cx
924 movdqu %xmm0, (%rdi)
925 mov %rdx, 16(%rdi)
926 mov %cx, 24(%rdi)
927# ifdef USE_AS_STPCPY
928 lea 25(%rdi), %rax
929# endif
930# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
931 sub $26, %r8
932 lea 26(%rdi), %rdi
933 jnz L(StrncpyFillTailWithZero)
934# endif
935 ret
936
937 .p2align 4
938L(Exit27):
939 movdqu (%rsi), %xmm0
940 mov 16(%rsi), %rdx
941 mov 23(%rsi), %ecx
942 movdqu %xmm0, (%rdi)
943 mov %rdx, 16(%rdi)
944 mov %ecx, 23(%rdi)
945# ifdef USE_AS_STPCPY
946 lea 26(%rdi), %rax
947# endif
948# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
949 sub $27, %r8
950 lea 27(%rdi), %rdi
951 jnz L(StrncpyFillTailWithZero)
952# endif
953 ret
954
955 .p2align 4
956L(Exit28):
957 movdqu (%rsi), %xmm0
958 mov 16(%rsi), %rdx
959 mov 24(%rsi), %ecx
960 movdqu %xmm0, (%rdi)
961 mov %rdx, 16(%rdi)
962 mov %ecx, 24(%rdi)
963# ifdef USE_AS_STPCPY
964 lea 27(%rdi), %rax
965# endif
966# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
967 sub $28, %r8
968 lea 28(%rdi), %rdi
969 jnz L(StrncpyFillTailWithZero)
970# endif
971 ret
972
973 .p2align 4
974L(Exit29):
975 movdqu (%rsi), %xmm0
976 movdqu 13(%rsi), %xmm2
977 movdqu %xmm0, (%rdi)
978 movdqu %xmm2, 13(%rdi)
979# ifdef USE_AS_STPCPY
980 lea 28(%rdi), %rax
981# endif
982# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
983 sub $29, %r8
984 lea 29(%rdi), %rdi
985 jnz L(StrncpyFillTailWithZero)
986# endif
987 ret
988
989 .p2align 4
990L(Exit30):
991 movdqu (%rsi), %xmm0
992 movdqu 14(%rsi), %xmm2
993 movdqu %xmm0, (%rdi)
994 movdqu %xmm2, 14(%rdi)
995# ifdef USE_AS_STPCPY
996 lea 29(%rdi), %rax
997# endif
998# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
999 sub $30, %r8
1000 lea 30(%rdi), %rdi
1001 jnz L(StrncpyFillTailWithZero)
1002# endif
1003 ret
1004
1005 .p2align 4
1006L(Exit31):
1007 movdqu (%rsi), %xmm0
1008 movdqu 15(%rsi), %xmm2
1009 movdqu %xmm0, (%rdi)
1010 movdqu %xmm2, 15(%rdi)
1011# ifdef USE_AS_STPCPY
1012 lea 30(%rdi), %rax
1013# endif
1014# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1015 sub $31, %r8
1016 lea 31(%rdi), %rdi
1017 jnz L(StrncpyFillTailWithZero)
1018# endif
1019 ret
1020
1021 .p2align 4
1022L(Exit32):
1023 movdqu (%rsi), %xmm0
1024 movdqu 16(%rsi), %xmm2
1025 movdqu %xmm0, (%rdi)
1026 movdqu %xmm2, 16(%rdi)
1027# ifdef USE_AS_STPCPY
1028 lea 31(%rdi), %rax
1029# endif
1030# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1031 sub $32, %r8
1032 lea 32(%rdi), %rdi
1033 jnz L(StrncpyFillTailWithZero)
1034# endif
1035 ret
1036
1037# ifdef USE_AS_STRNCPY
1038
1039 .p2align 4
1040L(StrncpyExit0):
1041# ifdef USE_AS_STPCPY
1042 mov %rdi, %rax
1043# endif
1044# ifdef USE_AS_STRCAT
1045 xor %ch, %ch
1046 movb %ch, (%rdi)
1047# endif
1048 ret
1049
1050 .p2align 4
1051L(StrncpyExit1):
1052 mov (%rsi), %dl
1053 mov %dl, (%rdi)
1054# ifdef USE_AS_STPCPY
1055 lea 1(%rdi), %rax
1056# endif
1057# ifdef USE_AS_STRCAT
1058 xor %ch, %ch
1059 movb %ch, 1(%rdi)
1060# endif
1061 ret
1062
1063 .p2align 4
1064L(StrncpyExit2):
1065 mov (%rsi), %dx
1066 mov %dx, (%rdi)
1067# ifdef USE_AS_STPCPY
1068 lea 2(%rdi), %rax
1069# endif
1070# ifdef USE_AS_STRCAT
1071 xor %ch, %ch
1072 movb %ch, 2(%rdi)
1073# endif
1074 ret
1075
1076 .p2align 4
1077L(StrncpyExit3):
1078 mov (%rsi), %cx
1079 mov 2(%rsi), %dl
1080 mov %cx, (%rdi)
1081 mov %dl, 2(%rdi)
1082# ifdef USE_AS_STPCPY
1083 lea 3(%rdi), %rax
1084# endif
1085# ifdef USE_AS_STRCAT
1086 xor %ch, %ch
1087 movb %ch, 3(%rdi)
1088# endif
1089 ret
1090
1091 .p2align 4
1092L(StrncpyExit4):
1093 mov (%rsi), %edx
1094 mov %edx, (%rdi)
1095# ifdef USE_AS_STPCPY
1096 lea 4(%rdi), %rax
1097# endif
1098# ifdef USE_AS_STRCAT
1099 xor %ch, %ch
1100 movb %ch, 4(%rdi)
1101# endif
1102 ret
1103
1104 .p2align 4
1105L(StrncpyExit5):
1106 mov (%rsi), %ecx
1107 mov 4(%rsi), %dl
1108 mov %ecx, (%rdi)
1109 mov %dl, 4(%rdi)
1110# ifdef USE_AS_STPCPY
1111 lea 5(%rdi), %rax
1112# endif
1113# ifdef USE_AS_STRCAT
1114 xor %ch, %ch
1115 movb %ch, 5(%rdi)
1116# endif
1117 ret
1118
1119 .p2align 4
1120L(StrncpyExit6):
1121 mov (%rsi), %ecx
1122 mov 4(%rsi), %dx
1123 mov %ecx, (%rdi)
1124 mov %dx, 4(%rdi)
1125# ifdef USE_AS_STPCPY
1126 lea 6(%rdi), %rax
1127# endif
1128# ifdef USE_AS_STRCAT
1129 xor %ch, %ch
1130 movb %ch, 6(%rdi)
1131# endif
1132 ret
1133
1134 .p2align 4
1135L(StrncpyExit7):
1136 mov (%rsi), %ecx
1137 mov 3(%rsi), %edx
1138 mov %ecx, (%rdi)
1139 mov %edx, 3(%rdi)
1140# ifdef USE_AS_STPCPY
1141 lea 7(%rdi), %rax
1142# endif
1143# ifdef USE_AS_STRCAT
1144 xor %ch, %ch
1145 movb %ch, 7(%rdi)
1146# endif
1147 ret
1148
1149 .p2align 4
1150L(StrncpyExit8):
1151 mov (%rsi), %rdx
1152 mov %rdx, (%rdi)
1153# ifdef USE_AS_STPCPY
1154 lea 8(%rdi), %rax
1155# endif
1156# ifdef USE_AS_STRCAT
1157 xor %ch, %ch
1158 movb %ch, 8(%rdi)
1159# endif
1160 ret
1161
1162 .p2align 4
1163L(StrncpyExit9):
1164 mov (%rsi), %rcx
1165 mov 8(%rsi), %dl
1166 mov %rcx, (%rdi)
1167 mov %dl, 8(%rdi)
1168# ifdef USE_AS_STPCPY
1169 lea 9(%rdi), %rax
1170# endif
1171# ifdef USE_AS_STRCAT
1172 xor %ch, %ch
1173 movb %ch, 9(%rdi)
1174# endif
1175 ret
1176
1177 .p2align 4
1178L(StrncpyExit10):
1179 mov (%rsi), %rcx
1180 mov 8(%rsi), %dx
1181 mov %rcx, (%rdi)
1182 mov %dx, 8(%rdi)
1183# ifdef USE_AS_STPCPY
1184 lea 10(%rdi), %rax
1185# endif
1186# ifdef USE_AS_STRCAT
1187 xor %ch, %ch
1188 movb %ch, 10(%rdi)
1189# endif
1190 ret
1191
1192 .p2align 4
1193L(StrncpyExit11):
1194 mov (%rsi), %rcx
1195 mov 7(%rsi), %edx
1196 mov %rcx, (%rdi)
1197 mov %edx, 7(%rdi)
1198# ifdef USE_AS_STPCPY
1199 lea 11(%rdi), %rax
1200# endif
1201# ifdef USE_AS_STRCAT
1202 xor %ch, %ch
1203 movb %ch, 11(%rdi)
1204# endif
1205 ret
1206
1207 .p2align 4
1208L(StrncpyExit12):
1209 mov (%rsi), %rcx
1210 mov 8(%rsi), %edx
1211 mov %rcx, (%rdi)
1212 mov %edx, 8(%rdi)
1213# ifdef USE_AS_STPCPY
1214 lea 12(%rdi), %rax
1215# endif
1216# ifdef USE_AS_STRCAT
1217 xor %ch, %ch
1218 movb %ch, 12(%rdi)
1219# endif
1220 ret
1221
1222 .p2align 4
1223L(StrncpyExit13):
1224 mov (%rsi), %rcx
1225 mov 5(%rsi), %rdx
1226 mov %rcx, (%rdi)
1227 mov %rdx, 5(%rdi)
1228# ifdef USE_AS_STPCPY
1229 lea 13(%rdi), %rax
1230# endif
1231# ifdef USE_AS_STRCAT
1232 xor %ch, %ch
1233 movb %ch, 13(%rdi)
1234# endif
1235 ret
1236
1237 .p2align 4
1238L(StrncpyExit14):
1239 mov (%rsi), %rcx
1240 mov 6(%rsi), %rdx
1241 mov %rcx, (%rdi)
1242 mov %rdx, 6(%rdi)
1243# ifdef USE_AS_STPCPY
1244 lea 14(%rdi), %rax
1245# endif
1246# ifdef USE_AS_STRCAT
1247 xor %ch, %ch
1248 movb %ch, 14(%rdi)
1249# endif
1250 ret
1251
1252 .p2align 4
1253L(StrncpyExit15):
1254 mov (%rsi), %rcx
1255 mov 7(%rsi), %rdx
1256 mov %rcx, (%rdi)
1257 mov %rdx, 7(%rdi)
1258# ifdef USE_AS_STPCPY
1259 lea 15(%rdi), %rax
1260# endif
1261# ifdef USE_AS_STRCAT
1262 xor %ch, %ch
1263 movb %ch, 15(%rdi)
1264# endif
1265 ret
1266
1267 .p2align 4
1268L(StrncpyExit16):
1269 movdqu (%rsi), %xmm0
1270 movdqu %xmm0, (%rdi)
1271# ifdef USE_AS_STPCPY
1272 lea 16(%rdi), %rax
1273# endif
1274# ifdef USE_AS_STRCAT
1275 xor %ch, %ch
1276 movb %ch, 16(%rdi)
1277# endif
1278 ret
1279
1280 .p2align 4
1281L(StrncpyExit17):
1282 movdqu (%rsi), %xmm0
1283 mov 16(%rsi), %cl
1284 movdqu %xmm0, (%rdi)
1285 mov %cl, 16(%rdi)
1286# ifdef USE_AS_STPCPY
1287 lea 17(%rdi), %rax
1288# endif
1289# ifdef USE_AS_STRCAT
1290 xor %ch, %ch
1291 movb %ch, 17(%rdi)
1292# endif
1293 ret
1294
1295 .p2align 4
1296L(StrncpyExit18):
1297 movdqu (%rsi), %xmm0
1298 mov 16(%rsi), %cx
1299 movdqu %xmm0, (%rdi)
1300 mov %cx, 16(%rdi)
1301# ifdef USE_AS_STPCPY
1302 lea 18(%rdi), %rax
1303# endif
1304# ifdef USE_AS_STRCAT
1305 xor %ch, %ch
1306 movb %ch, 18(%rdi)
1307# endif
1308 ret
1309
1310 .p2align 4
1311L(StrncpyExit19):
1312 movdqu (%rsi), %xmm0
1313 mov 15(%rsi), %ecx
1314 movdqu %xmm0, (%rdi)
1315 mov %ecx, 15(%rdi)
1316# ifdef USE_AS_STPCPY
1317 lea 19(%rdi), %rax
1318# endif
1319# ifdef USE_AS_STRCAT
1320 xor %ch, %ch
1321 movb %ch, 19(%rdi)
1322# endif
1323 ret
1324
1325 .p2align 4
1326L(StrncpyExit20):
1327 movdqu (%rsi), %xmm0
1328 mov 16(%rsi), %ecx
1329 movdqu %xmm0, (%rdi)
1330 mov %ecx, 16(%rdi)
1331# ifdef USE_AS_STPCPY
1332 lea 20(%rdi), %rax
1333# endif
1334# ifdef USE_AS_STRCAT
1335 xor %ch, %ch
1336 movb %ch, 20(%rdi)
1337# endif
1338 ret
1339
1340 .p2align 4
1341L(StrncpyExit21):
1342 movdqu (%rsi), %xmm0
1343 mov 16(%rsi), %ecx
1344 mov 20(%rsi), %dl
1345 movdqu %xmm0, (%rdi)
1346 mov %ecx, 16(%rdi)
1347 mov %dl, 20(%rdi)
1348# ifdef USE_AS_STPCPY
1349 lea 21(%rdi), %rax
1350# endif
1351# ifdef USE_AS_STRCAT
1352 xor %ch, %ch
1353 movb %ch, 21(%rdi)
1354# endif
1355 ret
1356
1357 .p2align 4
1358L(StrncpyExit22):
1359 movdqu (%rsi), %xmm0
1360 mov 14(%rsi), %rcx
1361 movdqu %xmm0, (%rdi)
1362 mov %rcx, 14(%rdi)
1363# ifdef USE_AS_STPCPY
1364 lea 22(%rdi), %rax
1365# endif
1366# ifdef USE_AS_STRCAT
1367 xor %ch, %ch
1368 movb %ch, 22(%rdi)
1369# endif
1370 ret
1371
1372 .p2align 4
1373L(StrncpyExit23):
1374 movdqu (%rsi), %xmm0
1375 mov 15(%rsi), %rcx
1376 movdqu %xmm0, (%rdi)
1377 mov %rcx, 15(%rdi)
1378# ifdef USE_AS_STPCPY
1379 lea 23(%rdi), %rax
1380# endif
1381# ifdef USE_AS_STRCAT
1382 xor %ch, %ch
1383 movb %ch, 23(%rdi)
1384# endif
1385 ret
1386
1387 .p2align 4
1388L(StrncpyExit24):
1389 movdqu (%rsi), %xmm0
1390 mov 16(%rsi), %rcx
1391 movdqu %xmm0, (%rdi)
1392 mov %rcx, 16(%rdi)
1393# ifdef USE_AS_STPCPY
1394 lea 24(%rdi), %rax
1395# endif
1396# ifdef USE_AS_STRCAT
1397 xor %ch, %ch
1398 movb %ch, 24(%rdi)
1399# endif
1400 ret
1401
1402 .p2align 4
1403L(StrncpyExit25):
1404 movdqu (%rsi), %xmm0
1405 mov 16(%rsi), %rdx
1406 mov 24(%rsi), %cl
1407 movdqu %xmm0, (%rdi)
1408 mov %rdx, 16(%rdi)
1409 mov %cl, 24(%rdi)
1410# ifdef USE_AS_STPCPY
1411 lea 25(%rdi), %rax
1412# endif
1413# ifdef USE_AS_STRCAT
1414 xor %ch, %ch
1415 movb %ch, 25(%rdi)
1416# endif
1417 ret
1418
1419 .p2align 4
1420L(StrncpyExit26):
1421 movdqu (%rsi), %xmm0
1422 mov 16(%rsi), %rdx
1423 mov 24(%rsi), %cx
1424 movdqu %xmm0, (%rdi)
1425 mov %rdx, 16(%rdi)
1426 mov %cx, 24(%rdi)
1427# ifdef USE_AS_STPCPY
1428 lea 26(%rdi), %rax
1429# endif
1430# ifdef USE_AS_STRCAT
1431 xor %ch, %ch
1432 movb %ch, 26(%rdi)
1433# endif
1434 ret
1435
1436 .p2align 4
1437L(StrncpyExit27):
1438 movdqu (%rsi), %xmm0
1439 mov 16(%rsi), %rdx
1440 mov 23(%rsi), %ecx
1441 movdqu %xmm0, (%rdi)
1442 mov %rdx, 16(%rdi)
1443 mov %ecx, 23(%rdi)
1444# ifdef USE_AS_STPCPY
1445 lea 27(%rdi), %rax
1446# endif
1447# ifdef USE_AS_STRCAT
1448 xor %ch, %ch
1449 movb %ch, 27(%rdi)
1450# endif
1451 ret
1452
1453 .p2align 4
1454L(StrncpyExit28):
1455 movdqu (%rsi), %xmm0
1456 mov 16(%rsi), %rdx
1457 mov 24(%rsi), %ecx
1458 movdqu %xmm0, (%rdi)
1459 mov %rdx, 16(%rdi)
1460 mov %ecx, 24(%rdi)
1461# ifdef USE_AS_STPCPY
1462 lea 28(%rdi), %rax
1463# endif
1464# ifdef USE_AS_STRCAT
1465 xor %ch, %ch
1466 movb %ch, 28(%rdi)
1467# endif
1468 ret
1469
1470 .p2align 4
1471L(StrncpyExit29):
1472 movdqu (%rsi), %xmm0
1473 movdqu 13(%rsi), %xmm2
1474 movdqu %xmm0, (%rdi)
1475 movdqu %xmm2, 13(%rdi)
1476# ifdef USE_AS_STPCPY
1477 lea 29(%rdi), %rax
1478# endif
1479# ifdef USE_AS_STRCAT
1480 xor %ch, %ch
1481 movb %ch, 29(%rdi)
1482# endif
1483 ret
1484
1485 .p2align 4
1486L(StrncpyExit30):
1487 movdqu (%rsi), %xmm0
1488 movdqu 14(%rsi), %xmm2
1489 movdqu %xmm0, (%rdi)
1490 movdqu %xmm2, 14(%rdi)
1491# ifdef USE_AS_STPCPY
1492 lea 30(%rdi), %rax
1493# endif
1494# ifdef USE_AS_STRCAT
1495 xor %ch, %ch
1496 movb %ch, 30(%rdi)
1497# endif
1498 ret
1499
1500 .p2align 4
1501L(StrncpyExit31):
1502 movdqu (%rsi), %xmm0
1503 movdqu 15(%rsi), %xmm2
1504 movdqu %xmm0, (%rdi)
1505 movdqu %xmm2, 15(%rdi)
1506# ifdef USE_AS_STPCPY
1507 lea 31(%rdi), %rax
1508# endif
1509# ifdef USE_AS_STRCAT
1510 xor %ch, %ch
1511 movb %ch, 31(%rdi)
1512# endif
1513 ret
1514
1515 .p2align 4
1516L(StrncpyExit32):
1517 movdqu (%rsi), %xmm0
1518 movdqu 16(%rsi), %xmm2
1519 movdqu %xmm0, (%rdi)
1520 movdqu %xmm2, 16(%rdi)
1521# ifdef USE_AS_STPCPY
1522 lea 32(%rdi), %rax
1523# endif
1524# ifdef USE_AS_STRCAT
1525 xor %ch, %ch
1526 movb %ch, 32(%rdi)
1527# endif
1528 ret
1529
1530 .p2align 4
1531L(StrncpyExit33):
1532 movdqu (%rsi), %xmm0
1533 movdqu 16(%rsi), %xmm2
1534 mov 32(%rsi), %cl
1535 movdqu %xmm0, (%rdi)
1536 movdqu %xmm2, 16(%rdi)
1537 mov %cl, 32(%rdi)
1538# ifdef USE_AS_STRCAT
1539 xor %ch, %ch
1540 movb %ch, 33(%rdi)
1541# endif
1542 ret
1543
1544# ifndef USE_AS_STRCAT
1545
1546 .p2align 4
1547L(Fill0):
1548 ret
1549
1550 .p2align 4
1551L(Fill1):
1552 mov %dl, (%rdi)
1553 ret
1554
1555 .p2align 4
1556L(Fill2):
1557 mov %dx, (%rdi)
1558 ret
1559
1560 .p2align 4
1561L(Fill3):
1562 mov %edx, -1(%rdi)
1563 ret
1564
1565 .p2align 4
1566L(Fill4):
1567 mov %edx, (%rdi)
1568 ret
1569
1570 .p2align 4
1571L(Fill5):
1572 mov %edx, (%rdi)
1573 mov %dl, 4(%rdi)
1574 ret
1575
1576 .p2align 4
1577L(Fill6):
1578 mov %edx, (%rdi)
1579 mov %dx, 4(%rdi)
1580 ret
1581
1582 .p2align 4
1583L(Fill7):
1584 mov %rdx, -1(%rdi)
1585 ret
1586
1587 .p2align 4
1588L(Fill8):
1589 mov %rdx, (%rdi)
1590 ret
1591
1592 .p2align 4
1593L(Fill9):
1594 mov %rdx, (%rdi)
1595 mov %dl, 8(%rdi)
1596 ret
1597
1598 .p2align 4
1599L(Fill10):
1600 mov %rdx, (%rdi)
1601 mov %dx, 8(%rdi)
1602 ret
1603
1604 .p2align 4
1605L(Fill11):
1606 mov %rdx, (%rdi)
1607 mov %edx, 7(%rdi)
1608 ret
1609
1610 .p2align 4
1611L(Fill12):
1612 mov %rdx, (%rdi)
1613 mov %edx, 8(%rdi)
1614 ret
1615
1616 .p2align 4
1617L(Fill13):
1618 mov %rdx, (%rdi)
1619 mov %rdx, 5(%rdi)
1620 ret
1621
1622 .p2align 4
1623L(Fill14):
1624 mov %rdx, (%rdi)
1625 mov %rdx, 6(%rdi)
1626 ret
1627
1628 .p2align 4
1629L(Fill15):
1630 movdqu %xmm0, -1(%rdi)
1631 ret
1632
1633 .p2align 4
1634L(Fill16):
1635 movdqu %xmm0, (%rdi)
1636 ret
1637
1638 .p2align 4
1639L(CopyFrom1To16BytesUnalignedXmm2):
1640 movdqu %xmm2, (%rdi, %rcx)
1641
1642 .p2align 4
1643L(CopyFrom1To16BytesXmmExit):
1644 bsf %rdx, %rdx
1645 add $15, %r8
1646 add %rcx, %rdi
1647# ifdef USE_AS_STPCPY
1648 lea (%rdi, %rdx), %rax
1649# endif
1650 sub %rdx, %r8
1651 lea 1(%rdi, %rdx), %rdi
1652
1653 .p2align 4
1654L(StrncpyFillTailWithZero):
1655 pxor %xmm0, %xmm0
1656 xor %rdx, %rdx
1657 sub $16, %r8
1658 jbe L(StrncpyFillExit)
1659
1660 movdqu %xmm0, (%rdi)
1661 add $16, %rdi
1662
1663 mov %rdi, %rsi
1664 and $0xf, %rsi
1665 sub %rsi, %rdi
1666 add %rsi, %r8
1667 sub $64, %r8
1668 jb L(StrncpyFillLess64)
1669
1670L(StrncpyFillLoopMovdqa):
1671 movdqa %xmm0, (%rdi)
1672 movdqa %xmm0, 16(%rdi)
1673 movdqa %xmm0, 32(%rdi)
1674 movdqa %xmm0, 48(%rdi)
1675 add $64, %rdi
1676 sub $64, %r8
1677 jae L(StrncpyFillLoopMovdqa)
1678
1679L(StrncpyFillLess64):
1680 add $32, %r8
1681 jl L(StrncpyFillLess32)
1682 movdqa %xmm0, (%rdi)
1683 movdqa %xmm0, 16(%rdi)
1684 add $32, %rdi
1685 sub $16, %r8
1686 jl L(StrncpyFillExit)
1687 movdqa %xmm0, (%rdi)
1688 add $16, %rdi
1689 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1690
1691L(StrncpyFillLess32):
1692 add $16, %r8
1693 jl L(StrncpyFillExit)
1694 movdqa %xmm0, (%rdi)
1695 add $16, %rdi
1696 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1697
1698L(StrncpyFillExit):
1699 add $16, %r8
1700 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1701
1702/* end of ifndef USE_AS_STRCAT */
1703# endif
1704
1705 .p2align 4
1706L(UnalignedLeaveCase2OrCase3):
1707 test %rdx, %rdx
1708 jnz L(Unaligned64LeaveCase2)
1709L(Unaligned64LeaveCase3):
1710 lea 64(%r8), %rcx
1711 and $-16, %rcx
1712 add $48, %r8
1713 jl L(CopyFrom1To16BytesCase3)
1714 movdqu %xmm4, (%rdi)
1715 sub $16, %r8
1716 jb L(CopyFrom1To16BytesCase3)
1717 movdqu %xmm5, 16(%rdi)
1718 sub $16, %r8
1719 jb L(CopyFrom1To16BytesCase3)
1720 movdqu %xmm6, 32(%rdi)
1721 sub $16, %r8
1722 jb L(CopyFrom1To16BytesCase3)
1723 movdqu %xmm7, 48(%rdi)
1724# ifdef USE_AS_STPCPY
1725 lea 64(%rdi), %rax
1726# endif
1727# ifdef USE_AS_STRCAT
1728 xor %ch, %ch
1729 movb %ch, 64(%rdi)
1730# endif
1731 ret
1732
1733 .p2align 4
1734L(Unaligned64LeaveCase2):
1735 xor %rcx, %rcx
1736 pcmpeqb %xmm4, %xmm0
1737 pmovmskb %xmm0, %rdx
1738 add $48, %r8
1739 jle L(CopyFrom1To16BytesCase2OrCase3)
1740 test %rdx, %rdx
1741# ifndef USE_AS_STRCAT
1742 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1743# else
1744 jnz L(CopyFrom1To16Bytes)
1745# endif
1746 pcmpeqb %xmm5, %xmm0
1747 pmovmskb %xmm0, %rdx
1748 movdqu %xmm4, (%rdi)
1749 add $16, %rcx
1750 sub $16, %r8
1751 jbe L(CopyFrom1To16BytesCase2OrCase3)
1752 test %rdx, %rdx
1753# ifndef USE_AS_STRCAT
1754 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1755# else
1756 jnz L(CopyFrom1To16Bytes)
1757# endif
1758
1759 pcmpeqb %xmm6, %xmm0
1760 pmovmskb %xmm0, %rdx
1761 movdqu %xmm5, 16(%rdi)
1762 add $16, %rcx
1763 sub $16, %r8
1764 jbe L(CopyFrom1To16BytesCase2OrCase3)
1765 test %rdx, %rdx
1766# ifndef USE_AS_STRCAT
1767 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1768# else
1769 jnz L(CopyFrom1To16Bytes)
1770# endif
1771
1772 pcmpeqb %xmm7, %xmm0
1773 pmovmskb %xmm0, %rdx
1774 movdqu %xmm6, 32(%rdi)
1775 lea 16(%rdi, %rcx), %rdi
1776 lea 16(%rsi, %rcx), %rsi
1777 bsf %rdx, %rdx
1778 cmp %r8, %rdx
1779 jb L(CopyFrom1To16BytesExit)
1780 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1781
1782 .p2align 4
1783L(ExitZero):
1784# ifndef USE_AS_STRCAT
1785 mov %rdi, %rax
1786# endif
1787 ret
1788
1789# endif
1790
1791# ifndef USE_AS_STRCAT
1792END (STRCPY)
1793# else
1794END (STRCAT)
1795# endif
1796 .p2align 4
1797 .section .rodata
1798L(ExitTable):
1799 .int JMPTBL(L(Exit1), L(ExitTable))
1800 .int JMPTBL(L(Exit2), L(ExitTable))
1801 .int JMPTBL(L(Exit3), L(ExitTable))
1802 .int JMPTBL(L(Exit4), L(ExitTable))
1803 .int JMPTBL(L(Exit5), L(ExitTable))
1804 .int JMPTBL(L(Exit6), L(ExitTable))
1805 .int JMPTBL(L(Exit7), L(ExitTable))
1806 .int JMPTBL(L(Exit8), L(ExitTable))
1807 .int JMPTBL(L(Exit9), L(ExitTable))
1808 .int JMPTBL(L(Exit10), L(ExitTable))
1809 .int JMPTBL(L(Exit11), L(ExitTable))
1810 .int JMPTBL(L(Exit12), L(ExitTable))
1811 .int JMPTBL(L(Exit13), L(ExitTable))
1812 .int JMPTBL(L(Exit14), L(ExitTable))
1813 .int JMPTBL(L(Exit15), L(ExitTable))
1814 .int JMPTBL(L(Exit16), L(ExitTable))
1815 .int JMPTBL(L(Exit17), L(ExitTable))
1816 .int JMPTBL(L(Exit18), L(ExitTable))
1817 .int JMPTBL(L(Exit19), L(ExitTable))
1818 .int JMPTBL(L(Exit20), L(ExitTable))
1819 .int JMPTBL(L(Exit21), L(ExitTable))
1820 .int JMPTBL(L(Exit22), L(ExitTable))
1821 .int JMPTBL(L(Exit23), L(ExitTable))
1822 .int JMPTBL(L(Exit24), L(ExitTable))
1823 .int JMPTBL(L(Exit25), L(ExitTable))
1824 .int JMPTBL(L(Exit26), L(ExitTable))
1825 .int JMPTBL(L(Exit27), L(ExitTable))
1826 .int JMPTBL(L(Exit28), L(ExitTable))
1827 .int JMPTBL(L(Exit29), L(ExitTable))
1828 .int JMPTBL(L(Exit30), L(ExitTable))
1829 .int JMPTBL(L(Exit31), L(ExitTable))
1830 .int JMPTBL(L(Exit32), L(ExitTable))
1831# ifdef USE_AS_STRNCPY
1832L(ExitStrncpyTable):
1833 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1834 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1865 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1866 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1867# ifndef USE_AS_STRCAT
1868 .p2align 4
1869L(FillTable):
1870 .int JMPTBL(L(Fill0), L(FillTable))
1871 .int JMPTBL(L(Fill1), L(FillTable))
1872 .int JMPTBL(L(Fill2), L(FillTable))
1873 .int JMPTBL(L(Fill3), L(FillTable))
1874 .int JMPTBL(L(Fill4), L(FillTable))
1875 .int JMPTBL(L(Fill5), L(FillTable))
1876 .int JMPTBL(L(Fill6), L(FillTable))
1877 .int JMPTBL(L(Fill7), L(FillTable))
1878 .int JMPTBL(L(Fill8), L(FillTable))
1879 .int JMPTBL(L(Fill9), L(FillTable))
1880 .int JMPTBL(L(Fill10), L(FillTable))
1881 .int JMPTBL(L(Fill11), L(FillTable))
1882 .int JMPTBL(L(Fill12), L(FillTable))
1883 .int JMPTBL(L(Fill13), L(FillTable))
1884 .int JMPTBL(L(Fill14), L(FillTable))
1885 .int JMPTBL(L(Fill15), L(FillTable))
1886 .int JMPTBL(L(Fill16), L(FillTable))
1887# endif
1888# endif
1889#endif
1890